diff --git a/cmake/GtsamBuildTypes.cmake b/cmake/GtsamBuildTypes.cmake
index 4647b51a1..e9521c257 100644
--- a/cmake/GtsamBuildTypes.cmake
+++ b/cmake/GtsamBuildTypes.cmake
@@ -61,18 +61,18 @@ mark_as_advanced(GTSAM_CMAKE_C_FLAGS_TIMING GTSAM_CMAKE_CXX_FLAGS_TIMING GTSAM_C
 
 # Apply the gtsam specific build flags as normal variables. This makes it so that they only
 # apply to the gtsam part of the build if gtsam is built as a subproject
-set(CMAKE_C_FLAGS   ${GTSAM_CMAKE_C_FLAGS})
-set(CMAKE_CXX_FLAGS ${GTSAM_CMAKE_CXX_FLAGS})
-set(CMAKE_C_FLAGS_DEBUG ${GTSAM_CMAKE_C_FLAGS_DEBUG})
-set(CMAKE_CXX_FLAGS_DEBUG ${GTSAM_CMAKE_CXX_FLAGS_DEBUG})
-set(CMAKE_C_FLAGS_RELWITHDEBINFO ${GTSAM_CMAKE_C_FLAGS_RELWITHDEBINFO})
-set(CMAKE_CXX_FLAGS_RELWITHDEBINFO ${GTSAM_CMAKE_CXX_FLAGS_RELWITHDEBINFO})
-set(CMAKE_C_FLAGS_RELEASE ${GTSAM_CMAKE_C_FLAGS_RELEASE})
-set(CMAKE_CXX_FLAGS_RELEASE ${GTSAM_CMAKE_CXX_FLAGS_RELEASE})
-set(CMAKE_C_FLAGS_PROFILING ${GTSAM_CMAKE_C_FLAGS_PROFILING})
-set(CMAKE_CXX_FLAGS_PROFILING ${GTSAM_CMAKE_CXX_FLAGS_PROFILING})
-set(CMAKE_C_FLAGS_TIMING ${GTSAM_CMAKE_C_FLAGS_TIMING})
-set(CMAKE_CXX_FLAGS_TIMING ${GTSAM_CMAKE_CXX_FLAGS_TIMING})
+set(CMAKE_C_FLAGS                  "${CMAKE_C_FLAGS} ${GTSAM_CMAKE_C_FLAGS}")
+set(CMAKE_CXX_FLAGS                "${CMAKE_CXX_FLAGS} ${GTSAM_CMAKE_CXX_FLAGS}")
+set(CMAKE_C_FLAGS_DEBUG            "${CMAKE_C_FLAGS_DEBUG} ${GTSAM_CMAKE_C_FLAGS_DEBUG}")
+set(CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS_DEBUG} ${GTSAM_CMAKE_CXX_FLAGS_DEBUG}")
+set(CMAKE_C_FLAGS_RELWITHDEBINFO   "${CMAKE_C_FLAGS_RELWITHDEBINFO} ${GTSAM_CMAKE_C_FLAGS_RELWITHDEBINFO}")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${GTSAM_CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+set(CMAKE_C_FLAGS_RELEASE          "${CMAKE_C_FLAGS_RELEASE} ${GTSAM_CMAKE_C_FLAGS_RELEASE}")
+set(CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS_RELEASE} ${GTSAM_CMAKE_CXX_FLAGS_RELEASE}")
+set(CMAKE_C_FLAGS_PROFILING        "${CMAKE_C_FLAGS_PROFILING} ${GTSAM_CMAKE_C_FLAGS_PROFILING}")
+set(CMAKE_CXX_FLAGS_PROFILING      "${CMAKE_CXX_FLAGS_PROFILING} ${GTSAM_CMAKE_CXX_FLAGS_PROFILING}")
+set(CMAKE_C_FLAGS_TIMING           "${CMAKE_C_FLAGS_TIMING} ${GTSAM_CMAKE_C_FLAGS_TIMING}")
+set(CMAKE_CXX_FLAGS_TIMING         "${CMAKE_CXX_FLAGS_TIMING} ${GTSAM_CMAKE_CXX_FLAGS_TIMING}")
 
 set(CMAKE_SHARED_LINKER_FLAGS_TIMING ${GTSAM_CMAKE_SHARED_LINKER_FLAGS_TIMING})
 set(CMAKE_MODULE_LINKER_FLAGS_TIMING ${GTSAM_CMAKE_MODULE_LINKER_FLAGS_TIMING})
diff --git a/gtsam/base/DSFVector.h b/gtsam/base/DSFVector.h
index 3c91d68c5..d73accf11 100644
--- a/gtsam/base/DSFVector.h
+++ b/gtsam/base/DSFVector.h
@@ -13,13 +13,16 @@
  * @file DSFVector.h
  * @date Jun 25, 2010
  * @author Kai Ni
- * @brief A faster implementation for DSF, which uses vector rather than btree. As a result, the size of the forest is prefixed.
+ * @brief A faster implementation for DSF, which uses vector rather than btree.
  */
 
 #pragma once
 
+#include <gtsam/dllexport.h>
 #include <gtsam/global_includes.h>
+
 #include <boost/shared_ptr.hpp>
+
 #include <vector>
 #include <set>
 #include <map>
@@ -41,27 +44,26 @@ private:
   boost::shared_ptr<V> v_;///< Stores parent pointers, representative iff v[i]==i
 
 public:
-  /// constructor that allocate new memory, allows for keys 0...numNodes-1
+  /// Constructor that allocates new memory, allows for keys 0...numNodes-1.
   DSFBase(const size_t numNodes);
 
-  /// constructor that uses the existing memory
+  /// Constructor that uses an existing, pre-allocated vector.
   DSFBase(const boost::shared_ptr<V>& v_in);
 
-  /// find the label of the set in which {key} lives
+  /// Find the label of the set in which {key} lives.
   size_t find(size_t key) const;
 
-  /// Merge two sets
+  /// Merge the sets containing i1 and i2. Does nothing if i1 and i2 are already in the same set.
   void merge(const size_t& i1, const size_t& i2);
 
-  /// @deprecated old name
+#ifdef GTSAM_ALLOW_DEPRECATED_SINCE_V4
   inline size_t findSet(size_t key) const {return find(key);}
-
-  /// @deprecated old name
   inline void makeUnionInPlace(const size_t& i1, const size_t& i2) {return merge(i1,i2);}
+#endif
 };
 
 /**
- * DSFVector additionaly keeps a vector of keys to support more expensive operations
+ * DSFVector additionally keeps a vector of keys to support more expensive operations
  * @addtogroup base
  */
 class GTSAM_EXPORT DSFVector: public DSFBase {
@@ -70,27 +72,27 @@ private:
   std::vector<size_t> keys_; ///< stores keys to support more expensive operations
 
 public:
-  /// constructor that allocate new memory, uses sequential keys 0...numNodes-1
+  /// Constructor that allocates new memory, uses sequential keys 0...numNodes-1.
   DSFVector(const size_t numNodes);
 
-  /// constructor that allocates memory, uses given keys
+  /// Constructor that allocates memory, uses given keys.
   DSFVector(const std::vector<size_t>& keys);
 
-  /// constructor that uses the existing memory
+  /// Constructor that uses existing vectors.
   DSFVector(const boost::shared_ptr<V>& v_in, const std::vector<size_t>& keys);
 
-  // all operations below loop over all keys and hence are *at least* O(n)
+  // All operations below loop over all keys and hence are *at least* O(n)
 
-  /// find whether there is one and only one occurrence for the given {label}
+  /// Find whether there is one and only one occurrence for the given {label}.
   bool isSingleton(const size_t& label) const;
 
-  /// get the nodes in the tree with the given label
+  /// Get the nodes in the tree with the given label
   std::set<size_t> set(const size_t& label) const;
 
-  /// return all sets, i.e. a partition of all elements
+  /// Return all sets, i.e. a partition of all elements.
   std::map<size_t, std::set<size_t> > sets() const;
 
-  /// return all sets, i.e. a partition of all elements
+  /// Return all sets, i.e. a partition of all elements.
   std::map<size_t, std::vector<size_t> > arrays() const;
 };
 
diff --git a/gtsam/base/SymmetricBlockMatrix.cpp b/gtsam/base/SymmetricBlockMatrix.cpp
index 7cca63092..5eaa2df1c 100644
--- a/gtsam/base/SymmetricBlockMatrix.cpp
+++ b/gtsam/base/SymmetricBlockMatrix.cpp
@@ -51,32 +51,42 @@ SymmetricBlockMatrix SymmetricBlockMatrix::LikeActiveViewOf(
 }
 
 /* ************************************************************************* */
-VerticalBlockMatrix SymmetricBlockMatrix::choleskyPartial(
-    DenseIndex nFrontals) {
-  // Do dense elimination
-  if (blockStart() != 0)
-    throw std::invalid_argument(
-        "Can only do Cholesky when the SymmetricBlockMatrix is not a restricted view, i.e. when blockStart == 0.");
-  if (!gtsam::choleskyPartial(matrix_, offset(nFrontals)))
+Matrix SymmetricBlockMatrix::block(DenseIndex I, DenseIndex J) const {
+  if (I == J) {
+    return diagonalBlock(I);
+  } else if (I < J) {
+    return aboveDiagonalBlock(I, J);
+  } else {
+    return aboveDiagonalBlock(J, I).transpose();
+  }
+}
+
+/* ************************************************************************* */
+void SymmetricBlockMatrix::choleskyPartial(DenseIndex nFrontals) {
+  gttic(VerticalBlockMatrix_choleskyPartial);
+  DenseIndex topleft = variableColOffsets_[blockStart_];
+  if (!gtsam::choleskyPartial(matrix_, offset(nFrontals) - topleft, topleft))
     throw CholeskyFailed();
+}
 
-  // Split conditional
+/* ************************************************************************* */
+VerticalBlockMatrix SymmetricBlockMatrix::split(DenseIndex nFrontals) {
+  gttic(VerticalBlockMatrix_split);
 
-  // Create one big conditionals with many frontal variables.
-  gttic(Construct_conditional);
-  const size_t varDim = offset(nFrontals);
-  VerticalBlockMatrix Ab = VerticalBlockMatrix::LikeActiveViewOf(*this, varDim);
-  Ab.full() = matrix_.topRows(varDim);
-  Ab.full().triangularView<Eigen::StrictlyLower>().setZero();
-  gttoc(Construct_conditional);
+  // Construct a VerticalBlockMatrix that contains [R Sd]
+  const size_t n1 = offset(nFrontals);
+  VerticalBlockMatrix RSd = VerticalBlockMatrix::LikeActiveViewOf(*this, n1);
+
+  // Copy into it.
+  RSd.full() = matrix_.topRows(n1);
+  RSd.full().triangularView<Eigen::StrictlyLower>().setZero();
 
-  gttic(Remaining_factor);
   // Take lower-right block of Ab_ to get the remaining factor
   blockStart() = nFrontals;
-  gttoc(Remaining_factor);
 
-  return Ab;
+  return RSd;
 }
+
 /* ************************************************************************* */
 
 } //\ namespace gtsam
diff --git a/gtsam/base/SymmetricBlockMatrix.h b/gtsam/base/SymmetricBlockMatrix.h
index 1f81ca1f9..da3a9a8b8 100644
--- a/gtsam/base/SymmetricBlockMatrix.h
+++ b/gtsam/base/SymmetricBlockMatrix.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
-* GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+* GTSAM Copyright 2010, Georgia Tech Research Corporation,
 * Atlanta, Georgia 30332-0415
 * All Rights Reserved
 * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -19,7 +19,6 @@
 
 #include <gtsam/base/FastVector.h>
 #include <gtsam/base/Matrix.h>
-#include <gtsam/base/SymmetricBlockMatrixBlockExpr.h>
 #include <gtsam/base/types.h>
 #include <gtsam/dllexport.h>
 #include <boost/serialization/nvp.hpp>
@@ -53,8 +52,8 @@ namespace gtsam {
   {
   public:
     typedef SymmetricBlockMatrix This;
-    typedef SymmetricBlockMatrixBlockExpr<This> Block;
-    typedef SymmetricBlockMatrixBlockExpr<const This> constBlock;
+    typedef Eigen::Block<Matrix> Block;
+    typedef Eigen::Block<const Matrix> constBlock;
 
   protected:
     Matrix matrix_; ///< The full matrix
@@ -105,12 +104,12 @@ namespace gtsam {
         throw std::invalid_argument("Requested to create a SymmetricBlockMatrix with dimensions that do not sum to the total size of the provided matrix.");
       assertInvariants();
     }
-    
+
     /// Copy the block structure, but do not copy the matrix data.  If blockStart() has been
     /// modified, this copies the structure of the corresponding matrix view. In the destination
     /// SymmetricBlockMatrix, blockStart() will be 0.
     static SymmetricBlockMatrix LikeActiveViewOf(const SymmetricBlockMatrix& other);
-    
+
     /// Copy the block structure, but do not copy the matrix data. If blockStart() has been
     /// modified, this copies the structure of the corresponding matrix view. In the destination
     /// SymmetricBlockMatrix, blockStart() will be 0.
@@ -123,71 +122,165 @@ namespace gtsam {
     DenseIndex cols() const { return rows(); }
 
     /// Block count
-    DenseIndex nBlocks() const { assertInvariants(); return variableColOffsets_.size() - 1 - blockStart_; }
+    DenseIndex nBlocks() const { return nActualBlocks() - blockStart_; }
 
-    /// Access the block with vertical block index \c i_block and horizontal block index \c j_block.
-    /// Note that the actual block accessed in the underlying matrix is relative to blockStart().
-    Block operator()(DenseIndex i_block, DenseIndex j_block) {
-      return Block(*this, i_block, j_block);
+    /// Number of dimensions for variable on this diagonal block.
+    DenseIndex getDim(DenseIndex block) const {
+      return calcIndices(block, block, 1, 1)[2];
     }
 
-    /// Access the block with vertical block index \c i_block and horizontal block index \c j_block.
-    /// Note that the actual block accessed in the underlying matrix is relative to blockStart().
-    constBlock operator()(DenseIndex i_block, DenseIndex j_block) const {
-      return constBlock(*this, i_block, j_block);
+    /// @name Block getter methods.
+    /// @{
+
+    /// Get a copy of a block (anywhere in the matrix).
+    /// This method makes a copy - use the methods below if performance is critical.
+    Matrix block(DenseIndex I, DenseIndex J) const;
+
+    /// Return the J'th diagonal block as a self adjoint view.
+    Eigen::SelfAdjointView<Block, Eigen::Upper> diagonalBlock(DenseIndex J) {
+      return block_(J, J).selfadjointView<Eigen::Upper>();
     }
 
-    /// Access the range of blocks starting with vertical block index \c i_startBlock, ending with
-    /// vertical block index \c i_endBlock, starting with horizontal block index \c j_startBlock,
-    /// and ending with horizontal block index \c j_endBlock.  End block indices are exclusive. Note
-    /// that the actual blocks accessed in the underlying matrix are relative to blockStart().
-    Block range(DenseIndex i_startBlock, DenseIndex i_endBlock, DenseIndex j_startBlock, DenseIndex j_endBlock) {
-      assertInvariants();
-      return Block(*this, i_startBlock, j_startBlock, i_endBlock - i_startBlock, j_endBlock - j_startBlock);
+    /// Return the J'th diagonal block as a self adjoint view.
+    Eigen::SelfAdjointView<constBlock, Eigen::Upper> diagonalBlock(DenseIndex J) const {
+      return block_(J, J).selfadjointView<Eigen::Upper>();
     }
 
-    /// Access the range of blocks starting with vertical block index \c i_startBlock, ending with
-    /// vertical block index \c i_endBlock, starting with horizontal block index \c j_startBlock,
-    /// and ending with horizontal block index \c j_endBlock.  End block indices are exclusive. Note
-    /// that the actual blocks accessed in the underlying matrix are relative to blockStart().
-    constBlock range(DenseIndex i_startBlock, DenseIndex i_endBlock, DenseIndex j_startBlock, DenseIndex j_endBlock) const {
-      assertInvariants();
-      return constBlock(*this, i_startBlock, j_startBlock, i_endBlock - i_startBlock, j_endBlock - j_startBlock);
+    /// Get the diagonal of the J'th diagonal block.
+    Vector diagonal(DenseIndex J) const {
+      return block_(J, J).diagonal();
     }
 
-    /** Return the full matrix, *not* including any portions excluded by firstBlock(). */
-    Block full()
-    {
-      return Block(*this, 0, nBlocks(), 0);
+    /// Get block above the diagonal (I, J).
+    constBlock aboveDiagonalBlock(DenseIndex I, DenseIndex J) const {
+      assert(I < J);
+      return block_(I, J);
     }
 
-    /** Return the full matrix, *not* including any portions excluded by firstBlock(). */
-    constBlock full() const
-    {
-      return constBlock(*this, 0, nBlocks(), 0);
+    /// Return the square sub-matrix that contains blocks(i:j, i:j).
+    Eigen::SelfAdjointView<constBlock, Eigen::Upper> selfadjointView(
+        DenseIndex I, DenseIndex J) const {
+      assert(J > I);
+      return block_(I, I, J - I, J - I).selfadjointView<Eigen::Upper>();
     }
 
-    /** Access to full matrix, including any portions excluded by firstBlock() to other operations. */
-    Eigen::SelfAdjointView<const Matrix, Eigen::Upper> matrix() const
-    {
-      return matrix_;
+    /// Return the square sub-matrix that contains blocks(i:j, i:j) as a triangular view.
+    Eigen::TriangularView<constBlock, Eigen::Upper> triangularView(DenseIndex I,
+                                                                   DenseIndex J) const {
+      assert(J > I);
+      return block_(I, I, J - I, J - I).triangularView<Eigen::Upper>();
     }
 
-    /** Access to full matrix, including any portions excluded by firstBlock() to other operations. */
-    Eigen::SelfAdjointView<Matrix, Eigen::Upper> matrix()
-    {
-      return matrix_;
+    /// Get a range [i,j) from the matrix. Indices are in block units.
+    constBlock aboveDiagonalRange(DenseIndex i_startBlock,
+                                  DenseIndex i_endBlock,
+                                  DenseIndex j_startBlock,
+                                  DenseIndex j_endBlock) const {
+      assert(i_startBlock < j_startBlock);
+      assert(i_endBlock <= j_startBlock);
+      return block_(i_startBlock, j_startBlock, i_endBlock - i_startBlock,
+                   j_endBlock - j_startBlock);
     }
 
-    /// Return the absolute offset in the underlying matrix of the start of the specified \c block.
-    DenseIndex offset(DenseIndex block) const
-    {
-      assertInvariants();
-      DenseIndex actualBlock = block + blockStart_;
-      checkBlock(actualBlock);
-      return variableColOffsets_[actualBlock];
+    /// Get a range [i,j) from the matrix. Indices are in block units.
+    Block aboveDiagonalRange(DenseIndex i_startBlock, DenseIndex i_endBlock,
+                             DenseIndex j_startBlock, DenseIndex j_endBlock) {
+      assert(i_startBlock < j_startBlock);
+      assert(i_endBlock <= j_startBlock);
+      return block_(i_startBlock, j_startBlock, i_endBlock - i_startBlock,
+                   j_endBlock - j_startBlock);
     }
 
+    /// @}
+    /// @name Block setter methods.
+    /// @{
+
+    /// Set a diagonal block. Only the upper triangular portion of `xpr` is evaluated.
+    template <typename XprType>
+    void setDiagonalBlock(DenseIndex I, const XprType& xpr) {
+      block_(I, I).triangularView<Eigen::Upper>() = xpr.template triangularView<Eigen::Upper>();
+    }
+
+    /// Set an off-diagonal block. Only the upper triangular portion of `xpr` is evaluated.
+    template <typename XprType>
+    void setOffDiagonalBlock(DenseIndex I, DenseIndex J, const XprType& xpr) {
+      assert(I != J);
+      if (I < J) {
+        block_(I, J) = xpr;
+      } else {
+        block_(J, I) = xpr.transpose();
+      }
+    }
+
+    /// Increment the diagonal block by the values in `xpr`. Only reads the upper triangular part of `xpr`.
+    template <typename XprType>
+    void updateDiagonalBlock(DenseIndex I, const XprType& xpr) {
+      // TODO(gareth): Eigen won't let us add triangular or self-adjoint views
+      // here, so we do it manually.
+      auto dest = block_(I, I);
+      assert(dest.rows() == xpr.rows());
+      assert(dest.cols() == xpr.cols());
+      for (DenseIndex col = 0; col < dest.cols(); ++col) {
+        for (DenseIndex row = 0; row <= col; ++row) {
+          dest(row, col) += xpr(row, col);
+        }
+      }
+    }
+
+    /// Update an off diagonal block.
+    /// NOTE(emmett): This assumes noalias().
+    template <typename XprType>
+    void updateOffDiagonalBlock(DenseIndex I, DenseIndex J, const XprType& xpr) {
+      assert(I != J);
+      if (I < J) {
+        block_(I, J).noalias() += xpr;
+      } else {
+        block_(J, I).noalias() += xpr.transpose();
+      }
+    }
+
+    /// @}
+    /// @name Accessing the full matrix.
+    /// @{
+
+    /// Get self adjoint view.
+    Eigen::SelfAdjointView<Block, Eigen::Upper> selfadjointView() {
+      return full().selfadjointView<Eigen::Upper>();
+    }
+
+    /// Get self adjoint view.
+    Eigen::SelfAdjointView<constBlock, Eigen::Upper> selfadjointView() const {
+      return full().selfadjointView<Eigen::Upper>();
+    }
+
+    /// Set the entire active matrix. Only reads the upper triangular part of `xpr`.
+    template <typename XprType>
+    void setFullMatrix(const XprType& xpr) {
+      full().triangularView<Eigen::Upper>() = xpr.template triangularView<Eigen::Upper>();
+    }
+
+    /// Set the entire active matrix zero.
+    void setZero() {
+      full().triangularView<Eigen::Upper>().setZero();
+    }
+
+    /// Negate the entire active matrix.
+    void negate() {
+      full().triangularView<Eigen::Upper>() *= -1.0;
+    }
+
+    /// Invert the entire active matrix in place.
+    void invertInPlace() {
+      const auto identity = Matrix::Identity(rows(), rows());
+      full().triangularView<Eigen::Upper>() =
+          selfadjointView()
+              .llt()
+              .solve(identity)
+              .triangularView<Eigen::Upper>();
+    }
+
+    /// @}
+
     /// Retrieve or modify the first logical block, i.e. the block referenced by block index 0.
     /// Blocks before it will be inaccessible, except by accessing the underlying matrix using
     /// matrix().
@@ -197,11 +290,86 @@ namespace gtsam {
     /// it will be inaccessible, except by accessing the underlying matrix using matrix().
     DenseIndex blockStart() const { return blockStart_; }
 
-    /// Do partial Cholesky in-place and return the eliminated block matrix, leaving the remaining
-    /// symmetric matrix in place.
-    VerticalBlockMatrix choleskyPartial(DenseIndex nFrontals);
+    /**
+     * Given the augmented Hessian [A1'A1 A1'A2 A1'b
+     *                              A2'A1 A2'A2 A2'b
+     *                               b'A1  b'A2  b'b]
+     * on x1 and x2, does partial Cholesky in-place to obtain [R Sd;0 L]  such that
+     *   R'R  = A1'A1
+     *   R'Sd = [A1'A2 A1'b]
+     *   L'L is the augmented Hessian on the the separator x2
+     * R and Sd can be interpreted as a GaussianConditional |R*x1 + S*x2 - d]^2
+     */
+    void choleskyPartial(DenseIndex nFrontals);
+
+    /**
+     * After partial Cholesky, we can optionally split off R and Sd, to be interpreted as
+     * a GaussianConditional |R*x1 + S*x2 - d]^2. We leave the symmetric lower block L in place,
+     * and adjust block_start so now *this refers to it.
+     */
+    VerticalBlockMatrix split(DenseIndex nFrontals);
 
   protected:
+
+    /// Number of offsets in the full matrix.
+    DenseIndex nOffsets() const {
+      return variableColOffsets_.size();
+    }
+
+    /// Number of actual blocks in the full matrix.
+    DenseIndex nActualBlocks() const {
+      return nOffsets() - 1;
+    }
+
+    /// Get an offset for a block index (in the active view).
+    DenseIndex offset(DenseIndex block) const {
+      assert(block >= 0);
+      const DenseIndex actual_index = block + blockStart();
+      assert(actual_index < nOffsets());
+      return variableColOffsets_[actual_index];
+    }
+
+    /// Get an arbitrary block from the matrix. Indices are in block units.
+    constBlock block_(DenseIndex iBlock, DenseIndex jBlock,
+                      DenseIndex blockRows = 1, DenseIndex blockCols = 1) const {
+      const std::array<DenseIndex, 4> indices =
+          calcIndices(iBlock, jBlock, blockRows, blockCols);
+      return matrix_.block(indices[0], indices[1], indices[2], indices[3]);
+    }
+
+    /// Get an arbitrary block from the matrix. Indices are in block units.
+    Block block_(DenseIndex iBlock, DenseIndex jBlock, DenseIndex blockRows = 1,
+                 DenseIndex blockCols = 1) {
+      const std::array<DenseIndex, 4> indices =
+          calcIndices(iBlock, jBlock, blockRows, blockCols);
+      return matrix_.block(indices[0], indices[1], indices[2], indices[3]);
+    }
+
+    /// Get the full matrix as a block.
+    constBlock full() const {
+      return block_(0, 0, nBlocks(), nBlocks());
+    }
+
+    /// Get the full matrix as a block.
+    Block full() {
+      return block_(0, 0, nBlocks(), nBlocks());
+    }
+
+    /// Compute the indices into the underlying matrix for a given block.
+    std::array<DenseIndex, 4> calcIndices(DenseIndex iBlock, DenseIndex jBlock,
+                                          DenseIndex blockRows,
+                                          DenseIndex blockCols) const {
+      assert(blockRows >= 0);
+      assert(blockCols >= 0);
+
+      // adjust indices to account for start and size of blocks
+      const DenseIndex denseI = offset(iBlock);
+      const DenseIndex denseJ = offset(jBlock);
+      const DenseIndex denseRows = offset(iBlock + blockRows) - denseI;
+      const DenseIndex denseCols = offset(jBlock + blockCols) - denseJ;
+      return {{denseI, denseJ, denseRows, denseCols}};
+    }
+
     void assertInvariants() const
     {
       assert(matrix_.rows() == matrix_.cols());
@@ -209,21 +377,6 @@ namespace gtsam {
       assert(blockStart_ < (DenseIndex)variableColOffsets_.size());
     }
 
-    void checkBlock(DenseIndex block) const
-    {
-      static_cast<void>(block); //Disable unused varibale warnings.
-      assert(matrix_.rows() == matrix_.cols());
-      assert(matrix_.cols() == variableColOffsets_.back());
-      assert(block >= 0);
-      assert(block < (DenseIndex)variableColOffsets_.size()-1);
-      assert(variableColOffsets_[block] < matrix_.cols() && variableColOffsets_[block+1] <= matrix_.cols());
-    }
-
-    DenseIndex offsetUnchecked(DenseIndex block) const
-    {
-      return variableColOffsets_[block + blockStart_];
-    }
-
     template<typename ITERATOR>
     void fillOffsets(ITERATOR firstBlockDim, ITERATOR lastBlockDim, bool appendOneDimension)
     {
diff --git a/gtsam/base/SymmetricBlockMatrixBlockExpr.h b/gtsam/base/SymmetricBlockMatrixBlockExpr.h
deleted file mode 100644
index dd999ae6c..000000000
--- a/gtsam/base/SymmetricBlockMatrixBlockExpr.h
+++ /dev/null
@@ -1,337 +0,0 @@
-/* ----------------------------------------------------------------------------
-
-* GTSAM Copyright 2010, Georgia Tech Research Corporation,
-* Atlanta, Georgia 30332-0415
-* All Rights Reserved
-* Authors: Frank Dellaert, et al. (see THANKS for the full author list)
-
-* See LICENSE for the license information
-
-* -------------------------------------------------------------------------- */
-
-/**
-* @file    SymmetricBlockMatrixBlockExpr.h
-* @brief   Matrix expression for a block of a SymmetricBlockMatrix
-* @author  Richard Roberts
-* @date    Nov 20, 2013
-*/
-#pragma once
-
-#include <gtsam/base/Matrix.h>
-
-namespace gtsam { template<typename SymmetricBlockMatrixType> class SymmetricBlockMatrixBlockExpr; }
-namespace gtsam { class SymmetricBlockMatrix; }
-
-// traits class for Eigen expressions
-namespace Eigen
-{
-  namespace internal
-  {
-    template<typename SymmetricBlockMatrixType>
-    struct traits<gtsam::SymmetricBlockMatrixBlockExpr<SymmetricBlockMatrixType> > :
-    public traits<typename gtsam::const_selector<
-      SymmetricBlockMatrixType, gtsam::SymmetricBlockMatrix, gtsam::Matrix, const gtsam::Matrix>::type>
-    {
-    };
-  }
-}
-
-namespace gtsam
-{
-  /// A matrix expression that references a single block of a SymmetricBlockMatrix.  Depending on
-  /// the position of the block, this expression will behave either as a regular matrix block, a
-  /// transposed matrix block, or a symmetric matrix block.  The only reason this class is templated
-  /// on SymmetricBlockMatrixType is to allow for both const and non-const references.
-  template<typename SymmetricBlockMatrixType>
-  class SymmetricBlockMatrixBlockExpr : public Eigen::EigenBase<SymmetricBlockMatrixBlockExpr<SymmetricBlockMatrixType> >
-  {
-  protected:
-    SymmetricBlockMatrixType& xpr_; ///< The referenced SymmetricBlockMatrix
-    DenseIndex densei_; ///< The scalar indices of the referenced block
-    DenseIndex densej_; ///< The scalar indices of the referenced block
-    DenseIndex denseRows_; ///< The scalar size of the referenced block
-    DenseIndex denseCols_; ///< The scalar size of the referenced block
-    enum BlockType { Plain, SelfAdjoint, Transposed } blockType_; ///< The type of the referenced block, as determined by the block position
-    typedef SymmetricBlockMatrixBlockExpr<SymmetricBlockMatrixType> This;
-
-  public:
-    // Typedefs and constants used in Eigen
-    typedef typename const_selector<SymmetricBlockMatrixType, SymmetricBlockMatrix,
-      typename Eigen::internal::traits<This>::Scalar&, typename Eigen::internal::traits<This>::Scalar>::type ScalarRef;
-    typedef typename Eigen::internal::traits<This>::Scalar Scalar;
-    typedef typename Eigen::internal::traits<This>::Index Index;
-    static const Index ColsAtCompileTime = Eigen::Dynamic;
-    static const Index RowsAtCompileTime = Eigen::Dynamic;
-
-    typedef typename const_selector<SymmetricBlockMatrixType, SymmetricBlockMatrix, Matrix, const Matrix>::type
-      DenseMatrixType;
-
-    typedef Eigen::Map<DenseMatrixType, 0, Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic> > OffDiagonal;
-    typedef Eigen::SelfAdjointView<Eigen::Block<DenseMatrixType>, Eigen::Upper> SelfAdjointView;
-    typedef Eigen::TriangularView<Eigen::Block<DenseMatrixType>, Eigen::Upper> TriangularView;
-
-  protected:
-    mutable Eigen::Block<DenseMatrixType> myBlock_;
-    template<typename OtherSymmetricBlockMatrixType> friend class SymmetricBlockMatrixBlockExpr;
-
-  public:
-    /// Create a SymmetricBlockMatrixBlockExpr from the specified block of a SymmetricBlockMatrix.
-    SymmetricBlockMatrixBlockExpr(SymmetricBlockMatrixType& blockMatrix, Index iBlock, Index jBlock) :
-      xpr_(blockMatrix), myBlock_(blockMatrix.matrix_.block(0, 0, 0, 0))
-    {
-      initIndices(iBlock, jBlock);
-    }
-
-    /// Create a SymmetricBlockMatrixBlockExpr from the specified range of blocks of a
-    /// SymmetricBlockMatrix.
-    SymmetricBlockMatrixBlockExpr(SymmetricBlockMatrixType& blockMatrix,
-      Index firstRowBlock, Index firstColBlock, Index rowBlocks, Index colBlocks) :
-      xpr_(blockMatrix), myBlock_(blockMatrix.matrix_.block(0, 0, 0, 0))
-    {
-      initIndices(firstRowBlock, firstColBlock, rowBlocks, colBlocks);
-    }
-
-    /// Create a SymmetricBlockMatrixBlockExpr from the specified range of blocks of a
-    /// SymmetricBlockMatrix.
-    SymmetricBlockMatrixBlockExpr(SymmetricBlockMatrixType& blockMatrix, Index firstBlock, Index blocks, char /*dummy*/) :
-      xpr_(blockMatrix), myBlock_(blockMatrix.matrix_.block(0, 0, 0, 0))
-    {
-      initIndices(firstBlock, firstBlock, blocks, blocks);
-    }
-
-    inline Index rows() const { return blockType_ != Transposed ? denseRows_ : denseCols_; }
-    inline Index cols() const { return blockType_ != Transposed ? denseCols_ : denseRows_; }
-
-    inline BlockType blockType() const { return blockType_;  }
-
-    inline ScalarRef operator()(Index row, Index col) const
-    {
-      return coeffInternal<ScalarRef>(row, col);
-    }
-
-    inline OffDiagonal knownOffDiagonal() const
-    {
-      typedef Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic> DynamicStride;
-
-      // We can return a Map if we are either on an off-diagonal block, or a block of size 0 or 1
-      assert(blockType_ != SelfAdjoint || (denseRows_ <= 1 && denseCols_ <= 1));
-      if(blockType_ == Transposed)
-      {
-        // Swap the inner and outer stride to produce a transposed Map
-        Eigen::Block<DenseMatrixType> block = const_cast<This&>(*this).xpr_.matrix_.block(densei_, densej_, denseRows_, denseCols_);
-        return Eigen::Map<DenseMatrixType, 0, DynamicStride>(block.data(), block.cols(), block.rows(),
-          DynamicStride(block.innerStride(), block.outerStride()));
-      }
-      else
-      {
-        Eigen::Block<DenseMatrixType> block = const_cast<This&>(*this).xpr_.matrix_.block(densei_, densej_, denseRows_, denseCols_);
-        return Eigen::Map<DenseMatrixType, 0, DynamicStride>(block.data(), block.rows(), block.cols(),
-          DynamicStride(block.outerStride(), block.innerStride()));
-      }
-    }
-
-    inline SelfAdjointView selfadjointView() const
-    {
-      assert(blockType_ == SelfAdjoint);
-      return myBlock_;
-    }
-
-    inline TriangularView triangularView() const
-    {
-      assert(blockType_ == SelfAdjoint);
-      return myBlock_;
-    }
-
-    template<typename Dest> inline void evalTo(Dest& dst) const
-    {
-      // Just try to assign to the object using either a selfadjoint view or a block view
-      if(blockType_ == SelfAdjoint)
-        dst = selfadjointView();
-      else if(blockType_ == Plain)
-        dst = myBlock_;
-      else
-        dst = myBlock_.transpose();
-    }
-
-    //template<typename MatrixType> inline void evalTo(const Eigen::SelfAdjointView<MatrixType, Eigen::Upper>& rhs) const
-    //{
-    //  if(blockType_ == SelfAdjoint)
-    //    rhs.nestedExpression().triangularView<Eigen::Upper>() = triangularView();
-    //  else
-    //    throw std::invalid_argument("Cannot assign an off-diagonal block to a self-adjoint matrix");
-    //}
-
-    //template<typename MatrixType> inline void evalTo(const Eigen::TriangularView<MatrixType, Eigen::Upper>& rhs) const
-    //{
-    //  if(blockType_ == SelfAdjoint)
-    //    rhs.nestedExpression().triangularView<Eigen::Upper>() = triangularView();
-    //  else
-    //    throw std::invalid_argument("Cannot assign an off-diagonal block to a self-adjoint matrix");
-    //}
-
-    template<typename RhsDerived>
-    This& operator=(const Eigen::MatrixBase<RhsDerived>& rhs)
-    {
-      // Just try to assign to the object using either a selfadjoint view or a block view
-      if(blockType_ == SelfAdjoint)
-        triangularView() = rhs.derived().template triangularView<Eigen::Upper>();
-      else if(blockType_ == Plain)
-        myBlock_ = rhs.derived();
-      else
-        myBlock_.transpose() = rhs.derived();
-      return *this;
-    }
-
-    template<typename MatrixType>
-    This& operator=(const Eigen::SelfAdjointView<MatrixType, Eigen::Upper>& rhs)
-    {
-      if(blockType_ == SelfAdjoint)
-        triangularView() = rhs.nestedExpression().template triangularView<Eigen::Upper>();
-      else
-        throw std::invalid_argument("Cannot assign a self-adjoint matrix to an off-diagonal block");
-      return *this;
-    }
-
-    template<typename OtherSymmetricBlockMatrixType>
-    This& operator=(const SymmetricBlockMatrixBlockExpr<OtherSymmetricBlockMatrixType>& other)
-    {
-      _doAssign(other);
-      return *this;
-    }
-    
-    This& operator=(const This& other)
-    {
-      // This version is required so GCC doesn't synthesize a default operator=.
-      _doAssign(other);
-      return *this;
-    }
-
-    template<typename OtherSymmetricBlockMatrixType>
-    This& operator+=(const SymmetricBlockMatrixBlockExpr<OtherSymmetricBlockMatrixType>& other)
-    {
-      if(blockType_ == SelfAdjoint)
-      {
-        assert((BlockType)other.blockType() == SelfAdjoint);
-        triangularView() += other.triangularView().nestedExpression();
-      }
-      else if(blockType_ == Plain)
-      {
-        assert((BlockType)other.blockType() == Plain || (BlockType)other.blockType() == Transposed);
-        if((BlockType)other.blockType() == Transposed)
-          myBlock_ += other.myBlock_.transpose();
-        else
-          myBlock_ += other.myBlock_;
-      }
-      else
-      {
-        assert((BlockType)other.blockType() == Plain || (BlockType)other.blockType() == Transposed);
-        if((BlockType)other.blockType() == Transposed)
-          myBlock_.transpose() += other.myBlock_.transpose();
-        else
-          myBlock_.transpose() += other.myBlock_;
-      }
-      return *this;
-    }
-
-  private:
-    void initIndices(Index iBlock, Index jBlock, Index blockRows = 1, Index blockCols = 1)
-    {
-      if(iBlock == jBlock && blockRows == blockCols)
-      {
-        densei_ = xpr_.offset(iBlock);
-        densej_ = densei_;
-        if(blockRows > 0)
-          xpr_.checkBlock(iBlock + blockRows - 1);
-        denseRows_ = xpr_.offsetUnchecked(iBlock + blockRows) - densei_;
-        if(blockCols > 0)
-          xpr_.checkBlock(jBlock + blockCols - 1);
-        denseCols_ = xpr_.offsetUnchecked(jBlock + blockCols) - densej_;
-        blockType_ = SelfAdjoint;
-      }
-      else
-      {
-        if(jBlock > iBlock || (iBlock == jBlock && blockCols > blockRows))
-        {
-          densei_ = xpr_.offset(iBlock);
-          densej_ = xpr_.offset(jBlock);
-          if(blockRows > 0)
-            xpr_.checkBlock(iBlock + blockRows - 1);
-          denseRows_ = xpr_.offsetUnchecked(iBlock + blockRows) - densei_;
-          if(blockCols > 0)
-            xpr_.checkBlock(jBlock + blockCols - 1);
-          denseCols_ = xpr_.offsetUnchecked(jBlock + blockCols) - densej_;
-          blockType_ = Plain;
-        }
-        else
-        {
-          densei_ = xpr_.offset(jBlock);
-          densej_ = xpr_.offset(iBlock);
-          if(blockCols > 0)
-            xpr_.checkBlock(jBlock + blockCols - 1);
-          denseRows_ = xpr_.offsetUnchecked(jBlock + blockCols) - densei_;
-          if(blockRows > 0)
-            xpr_.checkBlock(iBlock + blockRows - 1);
-          denseCols_ = xpr_.offsetUnchecked(iBlock + blockRows) - densej_;
-          blockType_ = Transposed;
-        }
-
-        // Validate that the block does not cross below the diagonal (the indices have already been
-        // flipped above the diagonal for ranges starting below the diagonal).
-        if(densei_ + denseRows_ > densej_ + 1)
-          throw std::invalid_argument("Off-diagonal block ranges may not cross the diagonal");
-      }
-
-      new (&myBlock_) Eigen::Block<DenseMatrixType>(xpr_.matrix_.block(densei_, densej_, denseRows_, denseCols_));
-    }
-
-    template<typename ScalarType>
-    inline ScalarType coeffInternal(Index row, Index col) const
-    {
-      // We leave index checking up to the Block class
-      if(blockType_ == Plain)
-      {
-        return myBlock_(row, col);
-      }
-      else if(blockType_ == SelfAdjoint)
-      {
-        if(row <= col)
-          return myBlock_(row, col);
-        else
-          return myBlock_.transpose()(row, col);
-      }
-      else
-      {
-        return myBlock_.transpose()(row, col);
-      }
-    }
-    
-    template<typename OtherSymmetricBlockMatrixType>
-    void _doAssign(const SymmetricBlockMatrixBlockExpr<OtherSymmetricBlockMatrixType>& other)
-    {
-      if(blockType_ == SelfAdjoint)
-      {
-        assert((BlockType)other.blockType() == SelfAdjoint);
-        triangularView() = other.triangularView().nestedExpression();
-      }
-      else if(blockType_ == Plain)
-      {
-        assert((BlockType)other.blockType() == Plain || (BlockType)other.blockType() == Transposed);
-        if((BlockType)other.blockType() == Transposed)
-          myBlock_ = other.myBlock_.transpose();
-        else
-          myBlock_ = other.myBlock_;
-      }
-      else
-      {
-        assert((BlockType)other.blockType() == Plain || (BlockType)other.blockType() == Transposed);
-        if((BlockType)other.blockType() == Transposed)
-          myBlock_.transpose() = other.myBlock_.transpose();
-        else
-          myBlock_.transpose() = other.myBlock_;
-      }
-    }
-
-
-  };
-
-}
diff --git a/gtsam/base/Value.h b/gtsam/base/Value.h
index 9537baa31..2d5b9d879 100644
--- a/gtsam/base/Value.h
+++ b/gtsam/base/Value.h
@@ -18,11 +18,12 @@
 
 #pragma once
 
+#include <gtsam/config.h>      // Configuration from CMake
+
 #include <gtsam/base/Vector.h>
 #include <boost/serialization/assume_abstract.hpp>
 #include <memory>
 
-
 namespace gtsam {
 
   /**
@@ -37,7 +38,7 @@ namespace gtsam {
    * Values can operate generically on Value objects, retracting or computing
    * local coordinates for many Value objects of different types.
    *
-   * Inheriting from the DerivedValue class templated provides a generic implementation of
+   * Inheriting from the DerivedValue class template provides a generic implementation of
    * the pure virtual functions retract_(), localCoordinates_(), and equals_(), eliminating
    * the need to implement these functions in your class. Note that you must inherit from
    * DerivedValue templated on the class you are defining. For example you cannot define
diff --git a/gtsam/base/VectorSpace.h b/gtsam/base/VectorSpace.h
index cc28ac893..5456ae7f5 100644
--- a/gtsam/base/VectorSpace.h
+++ b/gtsam/base/VectorSpace.h
@@ -311,7 +311,7 @@ struct traits<Eigen::Matrix<double, M, N, Options, MaxRows, MaxCols> > :
   typedef Eigen::Matrix<double, dimension, dimension> Jacobian;
   typedef OptionalJacobian<dimension, dimension> ChartJacobian;
 
-  static TangentVector Local(Fixed origin, Fixed other,
+  static TangentVector Local(const Fixed& origin, const Fixed& other,
       ChartJacobian H1 = boost::none, ChartJacobian H2 = boost::none) {
     if (H1) (*H1) = -Jacobian::Identity();
     if (H2) (*H2) =  Jacobian::Identity();
@@ -320,7 +320,7 @@ struct traits<Eigen::Matrix<double, M, N, Options, MaxRows, MaxCols> > :
     return result;
   }
 
-  static Fixed Retract(Fixed origin, const TangentVector& v,
+  static Fixed Retract(const Fixed& origin, const TangentVector& v,
       ChartJacobian H1 = boost::none, ChartJacobian H2 = boost::none) {
     if (H1) (*H1) = Jacobian::Identity();
     if (H2) (*H2) = Jacobian::Identity();
diff --git a/gtsam/base/cholesky.cpp b/gtsam/base/cholesky.cpp
index f6e2848f6..7bc5949cc 100644
--- a/gtsam/base/cholesky.cpp
+++ b/gtsam/base/cholesky.cpp
@@ -13,10 +13,10 @@
  * @file    cholesky.cpp
  * @brief   Efficient incomplete Cholesky on rank-deficient matrices, todo: constrained Cholesky
  * @author  Richard Roberts
+ * @author  Frank Dellaert
  * @date    Nov 5, 2010
  */
 
-#include <gtsam/base/debug.h>
 #include <gtsam/base/cholesky.h>
 #include <gtsam/base/timing.h>
 
@@ -27,64 +27,53 @@ using namespace std;
 
 namespace gtsam {
 
-  static const double negativePivotThreshold = -1e-1;
-  static const double zeroPivotThreshold = 1e-6;
-  static const double underconstrainedPrior = 1e-5;
-  static const int underconstrainedExponentDifference = 12;
+static const double negativePivotThreshold = -1e-1;
+static const double zeroPivotThreshold = 1e-6;
+static const double underconstrainedPrior = 1e-5;
+static const int underconstrainedExponentDifference = 12;
 
 /* ************************************************************************* */
 static inline int choleskyStep(Matrix& ATA, size_t k, size_t order) {
-
-  const bool debug = ISDEBUG("choleskyCareful");
-
   // Get pivot value
-  double alpha = ATA(k,k);
+  double alpha = ATA(k, k);
 
   // Correct negative pivots from round-off error
-  if(alpha < negativePivotThreshold) {
-    if(debug) {
-      cout << "pivot = " << alpha << endl;
-      print(ATA, "Partially-factorized matrix: ");
-    }
+  if (alpha < negativePivotThreshold) {
     return -1;
-  } else if(alpha < 0.0)
+  } else if (alpha < 0.0)
     alpha = 0.0;
-    
+
   const double beta = sqrt(alpha);
 
-  if(beta > zeroPivotThreshold) {
+  if (beta > zeroPivotThreshold) {
     const double betainv = 1.0 / beta;
 
     // Update k,k
-    ATA(k,k) = beta;
+    ATA(k, k) = beta;
 
-    if(k < (order-1)) {
+    if (k < (order - 1)) {
       // Update A(k,k+1:end) <- A(k,k+1:end) / beta
       typedef Matrix::RowXpr::SegmentReturnType BlockRow;
-      BlockRow V = ATA.row(k).segment(k+1, order-(k+1));
+      BlockRow V = ATA.row(k).segment(k + 1, order - (k + 1));
       V *= betainv;
 
       // Update A(k+1:end, k+1:end) <- A(k+1:end, k+1:end) - v*v' / alpha
-      ATA.block(k+1, k+1, order-(k+1), order-(k+1)) -= V.transpose() * V;
-//      ATA.bottomRightCorner(order-(k+1), order-(k+1)).selfadjointView<Eigen::Upper>()
-//          .rankUpdate(V.adjoint(), -1);
+      ATA.block(k + 1, k + 1, order - (k + 1), order - (k + 1)) -= V.transpose() * V;
+      //      ATA.bottomRightCorner(order-(k+1), order-(k+1)).selfadjointView<Eigen::Upper>()
+      //          .rankUpdate(V.adjoint(), -1);
     }
     return 1;
   } else {
     // For zero pivots, add the underconstrained variable prior
-    ATA(k,k) = underconstrainedPrior;
-    for(size_t j=k+1; j<order; ++j)
-      ATA(k,j) = 0.0;
-    if(debug) cout << "choleskyCareful:  Skipping " << k << endl;
+    ATA(k, k) = underconstrainedPrior;
+    for (size_t j = k + 1; j < order; ++j)
+      ATA(k, j) = 0.0;
     return 0;
   }
 }
 
 /* ************************************************************************* */
-pair<size_t,bool> choleskyCareful(Matrix& ATA, int order) {
-
-  const bool debug = ISDEBUG("choleskyCareful");
-
+pair<size_t, bool> choleskyCareful(Matrix& ATA, int order) {
   // Check that the matrix is square (we do not check for symmetry)
   assert(ATA.rows() == ATA.cols());
 
@@ -92,7 +81,7 @@ pair<size_t,bool> choleskyCareful(Matrix& ATA, int order) {
   const size_t n = ATA.rows();
 
   // Negative order means factor the entire matrix
-  if(order < 0)
+  if (order < 0)
     order = int(n);
 
   assert(size_t(order) <= n);
@@ -102,13 +91,11 @@ pair<size_t,bool> choleskyCareful(Matrix& ATA, int order) {
   bool success = true;
 
   // Factor row-by-row
-  for(size_t k = 0; k < size_t(order); ++k) {
+  for (size_t k = 0; k < size_t(order); ++k) {
     int stepResult = choleskyStep(ATA, k, size_t(order));
-    if(stepResult == 1) {
-      if(debug) cout << "choleskyCareful:  Factored through " << k << endl;
-      if(debug) print(ATA, "ATA: ");
-      maxrank = k+1;
-    } else if(stepResult == -1) {
+    if (stepResult == 1) {
+      maxrank = k + 1;
+    } else if (stepResult == -1) {
       success = false;
       break;
     } /* else if(stepResult == 0) Found zero pivot */
@@ -118,72 +105,54 @@ pair<size_t,bool> choleskyCareful(Matrix& ATA, int order) {
 }
 
 /* ************************************************************************* */
-bool choleskyPartial(Matrix& ABC, size_t nFrontal) {
-
+bool choleskyPartial(Matrix& ABC, size_t nFrontal, size_t topleft) {
   gttic(choleskyPartial);
+  if (nFrontal == 0)
+    return true;
 
-  const bool debug = ISDEBUG("choleskyPartial");
+  assert(ABC.cols() == ABC.rows());
+  const Eigen::DenseIndex n = ABC.rows() - topleft;
+  assert(n >= 0 && nFrontal <= size_t(n));
 
-  assert(ABC.rows() == ABC.cols());
-  assert(ABC.rows() >= 0 && nFrontal <= size_t(ABC.rows()));
+  // Create views on blocks
+  auto A = ABC.block(topleft, topleft, nFrontal, nFrontal);
+  auto B = ABC.block(topleft, topleft + nFrontal, nFrontal, n - nFrontal);
+  auto C = ABC.block(topleft + nFrontal, topleft + nFrontal, n - nFrontal, n - nFrontal);
 
-  const size_t n = ABC.rows();
-
-  // Compute Cholesky factorization of A, overwrites A.
-  gttic(lld);
-  Eigen::ComputationInfo lltResult;
-  if(nFrontal > 0)
-  {
-    Eigen::LLT<Matrix, Eigen::Upper> llt = ABC.block(0, 0, nFrontal, nFrontal).selfadjointView<Eigen::Upper>().llt();
-    ABC.block(0, 0, nFrontal, nFrontal).triangularView<Eigen::Upper>() = llt.matrixU();
-    lltResult = llt.info();
-  }
-  else
-  {
-    lltResult = Eigen::Success;
-  }
-  gttoc(lld);
-
-  if(debug) cout << "R:\n" << Eigen::MatrixXd(ABC.topLeftCorner(nFrontal,nFrontal).triangularView<Eigen::Upper>()) << endl;
+  // Compute Cholesky factorization A = R'*R, overwrites A.
+  gttic(LLT);
+  Eigen::LLT<Matrix, Eigen::Upper> llt(A);
+  Eigen::ComputationInfo lltResult = llt.info();
+  if (lltResult != Eigen::Success)
+    return false;
+  auto R = A.triangularView<Eigen::Upper>();
+  R = llt.matrixU();
+  gttoc(LLT);
 
   // Compute S = inv(R') * B
   gttic(compute_S);
-  if(n - nFrontal > 0) {
-    ABC.topLeftCorner(nFrontal,nFrontal).triangularView<Eigen::Upper>().transpose().solveInPlace(
-        ABC.topRightCorner(nFrontal, n-nFrontal));
-  }
-  if(debug) cout << "S:\n" << ABC.topRightCorner(nFrontal, n-nFrontal) << endl;
+  if (nFrontal < n)
+    R.transpose().solveInPlace(B);
   gttoc(compute_S);
 
   // Compute L = C - S' * S
   gttic(compute_L);
-  if(debug) cout << "C:\n" << Eigen::MatrixXd(ABC.bottomRightCorner(n-nFrontal,n-nFrontal).selfadjointView<Eigen::Upper>()) << endl;
-  if(n - nFrontal > 0)
-    ABC.bottomRightCorner(n-nFrontal,n-nFrontal).selfadjointView<Eigen::Upper>().rankUpdate(
-        ABC.topRightCorner(nFrontal, n-nFrontal).transpose(), -1.0);
-  if(debug) cout << "L:\n" << Eigen::MatrixXd(ABC.bottomRightCorner(n-nFrontal,n-nFrontal).selfadjointView<Eigen::Upper>()) << endl;
+  if (nFrontal < n)
+    C.selfadjointView<Eigen::Upper>().rankUpdate(B.transpose(), -1.0);
   gttoc(compute_L);
 
   // Check last diagonal element - Eigen does not check it
-  bool ok;
-  if(lltResult == Eigen::Success) {
-    if(nFrontal >= 2) {
-      int exp2, exp1;
-      (void)frexp(ABC(nFrontal-2, nFrontal-2), &exp2);
-      (void)frexp(ABC(nFrontal-1, nFrontal-1), &exp1);
-      ok = (exp2 - exp1 < underconstrainedExponentDifference);
-    } else if(nFrontal == 1) {
-      int exp1;
-      (void)frexp(ABC(0,0), &exp1);
-      ok = (exp1 > -underconstrainedExponentDifference);
-    } else {
-      ok = true;
-    }
+  if (nFrontal >= 2) {
+    int exp2, exp1;
+    (void)frexp(R(topleft + nFrontal - 2, topleft + nFrontal - 2), &exp2);
+    (void)frexp(R(topleft + nFrontal - 1, topleft + nFrontal - 1), &exp1);
+    return (exp2 - exp1 < underconstrainedExponentDifference);
+  } else if (nFrontal == 1) {
+    int exp1;
+    (void)frexp(R(0, 0), &exp1);
+    return (exp1 > -underconstrainedExponentDifference);
   } else {
-    ok = false;
+    return true;
   }
-
-  return ok;
-}
-
 }
+}  // namespace gtsam
diff --git a/gtsam/base/cholesky.h b/gtsam/base/cholesky.h
index 3377ab251..5e3276ff0 100644
--- a/gtsam/base/cholesky.h
+++ b/gtsam/base/cholesky.h
@@ -55,10 +55,12 @@ GTSAM_EXPORT std::pair<size_t,bool> choleskyCareful(Matrix& ATA, int order = -1)
  * nFrontal determines the split between A, B, and C, with A being of size
  * nFrontal x nFrontal.
  *
+ * if non-zero, factorization proceeds in bottom-right corner starting at topleft
+ *
  * @return \c true if the decomposition is successful, \c false if \c A was
  * not positive-definite.
  */
-GTSAM_EXPORT bool choleskyPartial(Matrix& ABC, size_t nFrontal);
+GTSAM_EXPORT bool choleskyPartial(Matrix& ABC, size_t nFrontal, size_t topleft=0);
 
 }
 
diff --git a/gtsam/base/numericalDerivative.h b/gtsam/base/numericalDerivative.h
index 6cd28b951..dcb0425b7 100644
--- a/gtsam/base/numericalDerivative.h
+++ b/gtsam/base/numericalDerivative.h
@@ -320,8 +320,8 @@ typename internal::FixedSizeMatrix<Y,X1>::type numericalDerivative41(
 /**
  * Compute numerical derivative in argument 2 of 4-argument function
  * @param h ternary function yielding m-vector
- * @param x1 n-dimensional first argument value
- * @param x2 second argument value
+ * @param x1 first argument value
+ * @param x2 n-dimensional second argument value
  * @param x3 third argument value
  * @param x4 fourth argument value
  * @param delta increment for numerical derivative
@@ -333,11 +333,53 @@ typename internal::FixedSizeMatrix<Y,X2>::type numericalDerivative42(
     const X2& x2, const X3& x3, const X4& x4, double delta = 1e-5) {
   BOOST_STATIC_ASSERT_MSG( (boost::is_base_of<gtsam::manifold_tag, typename traits<Y>::structure_category>::value),
       "Template argument Y must be a manifold type.");
-  BOOST_STATIC_ASSERT_MSG( (boost::is_base_of<gtsam::manifold_tag, typename traits<X1>::structure_category>::value),
-      "Template argument X1 must be a manifold type.");
+  BOOST_STATIC_ASSERT_MSG( (boost::is_base_of<gtsam::manifold_tag, typename traits<X2>::structure_category>::value),
+      "Template argument X2 must be a manifold type.");
   return numericalDerivative11<Y, X2>(boost::bind(h, x1, _1, x3, x4), x2, delta);
 }
 
+/**
+ * Compute numerical derivative in argument 3 of 4-argument function
+ * @param h ternary function yielding m-vector
+ * @param x1 first argument value
+ * @param x2 second argument value
+ * @param x3 n-dimensional third argument value
+ * @param x4 fourth argument value
+ * @param delta increment for numerical derivative
+ * @return m*n Jacobian computed via central differencing
+ */
+template<class Y, class X1, class X2, class X3, class X4>
+typename internal::FixedSizeMatrix<Y,X3>::type numericalDerivative43(
+    boost::function<Y(const X1&, const X2&, const X3&, const X4&)> h, const X1& x1,
+    const X2& x2, const X3& x3, const X4& x4, double delta = 1e-5) {
+  BOOST_STATIC_ASSERT_MSG( (boost::is_base_of<gtsam::manifold_tag, typename traits<Y>::structure_category>::value),
+      "Template argument Y must be a manifold type.");
+  BOOST_STATIC_ASSERT_MSG( (boost::is_base_of<gtsam::manifold_tag, typename traits<X3>::structure_category>::value),
+      "Template argument X3 must be a manifold type.");
+  return numericalDerivative11<Y, X3>(boost::bind(h, x1, x2, _1, x4), x3, delta);
+}
+
+/**
+ * Compute numerical derivative in argument 4 of 4-argument function
+ * @param h ternary function yielding m-vector
+ * @param x1 first argument value
+ * @param x2 second argument value
+ * @param x3 third argument value
+ * @param x4 n-dimensional fourth argument value
+ * @param delta increment for numerical derivative
+ * @return m*n Jacobian computed via central differencing
+ */
+template<class Y, class X1, class X2, class X3, class X4>
+typename internal::FixedSizeMatrix<Y,X4>::type numericalDerivative44(
+    boost::function<Y(const X1&, const X2&, const X3&, const X4&)> h, const X1& x1,
+    const X2& x2, const X3& x3, const X4& x4, double delta = 1e-5) {
+  BOOST_STATIC_ASSERT_MSG( (boost::is_base_of<gtsam::manifold_tag, typename traits<Y>::structure_category>::value),
+      "Template argument Y must be a manifold type.");
+  BOOST_STATIC_ASSERT_MSG( (boost::is_base_of<gtsam::manifold_tag, typename traits<X4>::structure_category>::value),
+      "Template argument X4 must be a manifold type.");
+  return numericalDerivative11<Y, X4>(boost::bind(h, x1, x2, x3, _1), x4, delta);
+}
+
 /**
  * Compute numerical Hessian matrix.  Requires a single-argument Lie->scalar
  * function.  This is implemented simply as the derivative of the gradient.
diff --git a/gtsam/base/tests/testCholesky.cpp b/gtsam/base/tests/testCholesky.cpp
index 5120e9ac6..9b80658cf 100644
--- a/gtsam/base/tests/testCholesky.cpp
+++ b/gtsam/base/tests/testCholesky.cpp
@@ -23,10 +23,23 @@ using namespace gtsam;
 using namespace std;
 
 /* ************************************************************************* */
-TEST(cholesky, choleskyPartial) {
+TEST(cholesky, choleskyPartial0) {
 
   // choleskyPartial should only use the upper triangle, so this represents a
   // symmetric matrix.
+  Matrix ABC(3,3);
+  ABC <<  4.0375,   3.4584,   3.5735,
+          0.,       4.7267,   3.8423,
+          0.,       0.,       5.1600;
+
+  // Test passing 0 frontals to partialCholesky
+  Matrix RSL(ABC);
+  choleskyPartial(RSL, 0);
+  EXPECT(assert_equal(ABC, RSL, 1e-9));
+}
+
+/* ************************************************************************* */
+TEST(cholesky, choleskyPartial) {
   Matrix ABC = (Matrix(7,7) <<
                       4.0375,   3.4584,   3.5735,   2.4815,   2.1471,   2.7400,   2.2063,
                           0.,   4.7267,   3.8423,   2.3624,   2.8091,   2.9579,   2.5914,
diff --git a/gtsam/base/tests/testSymmetricBlockMatrix.cpp b/gtsam/base/tests/testSymmetricBlockMatrix.cpp
index da193aec5..c24e12c25 100644
--- a/gtsam/base/tests/testSymmetricBlockMatrix.cpp
+++ b/gtsam/base/tests/testSymmetricBlockMatrix.cpp
@@ -40,58 +40,41 @@ TEST(SymmetricBlockMatrix, ReadBlocks)
   Matrix expected1 = (Matrix(2, 2) <<
     22, 23,
     23, 29).finished();
-  Matrix actual1 = testBlockMatrix(1, 1);
-  // Test only writing the upper triangle for efficiency
-  Matrix actual1t = Z_2x2;
-  actual1t.triangularView<Eigen::Upper>() = testBlockMatrix(1, 1).triangularView();
+  Matrix actual1 = testBlockMatrix.diagonalBlock(1);
   EXPECT(assert_equal(expected1, actual1));
-  EXPECT(assert_equal(Matrix(expected1.triangularView<Eigen::Upper>()), actual1t));
 
   // Above the diagonal
   Matrix expected2 = (Matrix(3, 2) <<
     4, 5,
     10, 11,
     16, 17).finished();
-  Matrix actual2 = testBlockMatrix(0, 1);
+  Matrix actual2 = testBlockMatrix.aboveDiagonalBlock(0, 1);
   EXPECT(assert_equal(expected2, actual2));
-
-  // Below the diagonal
-  Matrix expected3 = (Matrix(2, 3) <<
-    4, 10, 16,
-    5, 11, 17).finished();
-  Matrix actual3 = testBlockMatrix(1, 0);
-  EXPECT(assert_equal(expected3, actual3));
 }
 
 /* ************************************************************************* */
 TEST(SymmetricBlockMatrix, WriteBlocks)
 {
   // On the diagonal
-  Matrix expected1 = testBlockMatrix(1, 1);
+  Matrix expected1 = testBlockMatrix.diagonalBlock(1);
   SymmetricBlockMatrix bm1 = SymmetricBlockMatrix::LikeActiveViewOf(testBlockMatrix);
-  bm1(1, 1) = expected1.selfadjointView<Eigen::Upper>(); // Verified with debugger that this only writes the upper triangle
-  Matrix actual1 = bm1(1, 1);
+
+  bm1.setDiagonalBlock(1, expected1);
+  Matrix actual1 = bm1.diagonalBlock(1);
   EXPECT(assert_equal(expected1, actual1));
 
-  // On the diagonal
-  Matrix expected1p = testBlockMatrix(1, 1);
-  SymmetricBlockMatrix bm1p = SymmetricBlockMatrix::LikeActiveViewOf(testBlockMatrix);
-  bm1p(1, 1) = expected1p; // Verified with debugger that this only writes the upper triangle
-  Matrix actual1p = bm1p(1, 1);
-  EXPECT(assert_equal(expected1p, actual1p));
-
   // Above the diagonal
-  Matrix expected2 = testBlockMatrix(0, 1);
+  Matrix expected2 = testBlockMatrix.aboveDiagonalBlock(0, 1);
   SymmetricBlockMatrix bm2 = SymmetricBlockMatrix::LikeActiveViewOf(testBlockMatrix);
-  bm2(0, 1) = expected2;
-  Matrix actual2 = bm2(0, 1);
+  bm2.setOffDiagonalBlock(0, 1, expected2);
+  Matrix actual2 = bm2.aboveDiagonalBlock(0, 1);
   EXPECT(assert_equal(expected2, actual2));
 
   // Below the diagonal
-  Matrix expected3 = testBlockMatrix(1, 0);
+  Matrix expected3 = testBlockMatrix.aboveDiagonalBlock(0, 1).transpose();
   SymmetricBlockMatrix bm3 = SymmetricBlockMatrix::LikeActiveViewOf(testBlockMatrix);
-  bm3(1, 0) = expected3;
-  Matrix actual3 = bm3(1, 0);
+  bm3.setOffDiagonalBlock(1, 0, expected3);
+  Matrix actual3 = bm3.aboveDiagonalBlock(0, 1).transpose();
   EXPECT(assert_equal(expected3, actual3));
 }
 
@@ -103,30 +86,16 @@ TEST(SymmetricBlockMatrix, Ranges)
     22, 23, 24,
     23, 29, 30,
     24, 30, 36).finished();
-  Matrix actual1 = testBlockMatrix.range(1, 3, 1, 3).selfadjointView();
-  Matrix actual1a = testBlockMatrix.range(1, 3, 1, 3);
+  Matrix actual1 = testBlockMatrix.selfadjointView(1, 3);
   EXPECT(assert_equal(expected1, actual1));
-  EXPECT(assert_equal(expected1, actual1a));
 
   // Above the diagonal
-  Matrix expected2 = (Matrix(3, 1) <<
-    24,
-    30,
-    36).finished();
-  Matrix actual2 = testBlockMatrix.range(1, 3, 2, 3).knownOffDiagonal();
-  Matrix actual2a = testBlockMatrix.range(1, 3, 2, 3);
+  Matrix expected2 = (Matrix(3, 3) <<
+    4, 5, 6,
+    10, 11, 12,
+    16, 17, 18).finished();
+  Matrix actual2 = testBlockMatrix.aboveDiagonalRange(0, 1, 1, 3);
   EXPECT(assert_equal(expected2, actual2));
-  EXPECT(assert_equal(expected2, actual2a));
-
-  // Below the diagonal
-  Matrix expected3 = (Matrix(3, 3) <<
-    4, 10, 16,
-    5, 11, 17,
-    6, 12, 18).finished();
-  Matrix actual3 = testBlockMatrix.range(1, 3, 0, 1).knownOffDiagonal();
-  Matrix actual3a = testBlockMatrix.range(1, 3, 0, 1);
-  EXPECT(assert_equal(expected3, actual3));
-  EXPECT(assert_equal(expected3, actual3a));
 }
 
 /* ************************************************************************* */
@@ -152,34 +121,51 @@ TEST(SymmetricBlockMatrix, expressions)
   Matrix b = (Matrix(1, 2) << 5, 6).finished();
 
   SymmetricBlockMatrix bm1(list_of(2)(3)(1));
-  bm1.full().triangularView().setZero();
-  bm1(1, 1).selfadjointView().rankUpdate(a.transpose());
-  EXPECT(assert_equal(Matrix(expected1.full().selfadjointView()), bm1.full().selfadjointView()));
+  bm1.setZero();
+  bm1.diagonalBlock(1).rankUpdate(a.transpose());
+  EXPECT(assert_equal(Matrix(expected1.selfadjointView()), bm1.selfadjointView()));
 
   SymmetricBlockMatrix bm2(list_of(2)(3)(1));
-  bm2.full().triangularView().setZero();
-  bm2(0, 1).knownOffDiagonal() += b.transpose() * a;
-  EXPECT(assert_equal(Matrix(expected2.full().selfadjointView()), bm2.full().selfadjointView()));
+  bm2.setZero();
+  bm2.updateOffDiagonalBlock(0, 1, b.transpose() * a);
+  EXPECT(assert_equal(Matrix(expected2.selfadjointView()), bm2.selfadjointView()));
 
   SymmetricBlockMatrix bm3(list_of(2)(3)(1));
-  bm3.full().triangularView().setZero();
-  bm3(1, 0).knownOffDiagonal() += a.transpose() * b;
-  EXPECT(assert_equal(Matrix(expected2.full().selfadjointView()), bm3.full().selfadjointView()));
+  bm3.setZero();
+  bm3.updateOffDiagonalBlock(1, 0, a.transpose() * b);
+  EXPECT(assert_equal(Matrix(expected2.selfadjointView()), bm3.selfadjointView()));
 
   SymmetricBlockMatrix bm4(list_of(2)(3)(1));
-  bm4.full().triangularView().setZero();
-  bm4(1, 1) += expected1(1, 1);
-  EXPECT(assert_equal(Matrix(expected1.full().selfadjointView()), bm4.full().selfadjointView()));
+  bm4.setZero();
+  bm4.updateDiagonalBlock(1, expected1.diagonalBlock(1));
+  EXPECT(assert_equal(Matrix(expected1.selfadjointView()), bm4.selfadjointView()));
 
   SymmetricBlockMatrix bm5(list_of(2)(3)(1));
-  bm5.full().triangularView().setZero();
-  bm5(0, 1) += expected2(0, 1);
-  EXPECT(assert_equal(Matrix(expected2.full().selfadjointView()), bm5.full().selfadjointView()));
+  bm5.setZero();
+  bm5.updateOffDiagonalBlock(0, 1, expected2.aboveDiagonalBlock(0, 1));
+  EXPECT(assert_equal(Matrix(expected2.selfadjointView()), bm5.selfadjointView()));
 
   SymmetricBlockMatrix bm6(list_of(2)(3)(1));
-  bm6.full().triangularView().setZero();
-  bm6(1, 0) += expected2(1, 0);
-  EXPECT(assert_equal(Matrix(expected2.full().selfadjointView()), bm6.full().selfadjointView()));
+  bm6.setZero();
+  bm6.updateOffDiagonalBlock(1, 0, expected2.aboveDiagonalBlock(0, 1).transpose());
+  EXPECT(assert_equal(Matrix(expected2.selfadjointView()), bm6.selfadjointView()));
+}
+
+/* ************************************************************************* */
+TEST(SymmetricBlockMatrix, inverseInPlace) {
+  // generate an invertible matrix
+  const Vector3 a(1.0, 0.2, 2.0), b(0.3, 0.8, -1.0), c(0.1, 0.2, 0.7);
+  Matrix inputMatrix(3, 3);
+  inputMatrix.setZero();
+  inputMatrix += a * a.transpose();
+  inputMatrix += b * b.transpose();
+  inputMatrix += c * c.transpose();
+  const Matrix expectedInverse = inputMatrix.inverse();
+
+  SymmetricBlockMatrix symmMatrix(list_of(2)(1), inputMatrix);
+  // invert in place
+  symmMatrix.invertInPlace();
+  EXPECT(assert_equal(expectedInverse, symmMatrix.selfadjointView()));
 }
 
 /* ************************************************************************* */
diff --git a/gtsam/base/treeTraversal-inst.h b/gtsam/base/treeTraversal-inst.h
index 0d670ba2e..7a88f72eb 100644
--- a/gtsam/base/treeTraversal-inst.h
+++ b/gtsam/base/treeTraversal-inst.h
@@ -29,7 +29,6 @@
 #include <string>
 #include <boost/shared_ptr.hpp>
 #include <boost/make_shared.hpp>
-#include <boost/bind.hpp>
 
 namespace gtsam {
 
@@ -158,7 +157,6 @@ void DepthFirstForestParallel(FOREST& forest, DATA& rootData,
 #ifdef GTSAM_USE_TBB
   // Typedefs
   typedef typename FOREST::Node Node;
-  typedef boost::shared_ptr<Node> sharedNode;
 
   tbb::task::spawn_root_and_wait(
       internal::CreateRootTask<Node>(forest.roots(), rootData, visitorPre,
diff --git a/gtsam/base/treeTraversal/parallelTraversalTasks.h b/gtsam/base/treeTraversal/parallelTraversalTasks.h
index ccdd269c3..9b2dae3d0 100644
--- a/gtsam/base/treeTraversal/parallelTraversalTasks.h
+++ b/gtsam/base/treeTraversal/parallelTraversalTasks.h
@@ -44,8 +44,9 @@ namespace gtsam {
         boost::shared_ptr<DATA> myData;
         VISITOR_POST& visitorPost;
 
-        PostOrderTask(const boost::shared_ptr<NODE>& treeNode, const boost::shared_ptr<DATA>& myData, VISITOR_POST& visitorPost) :
-          treeNode(treeNode), myData(myData), visitorPost(visitorPost) {}
+        PostOrderTask(const boost::shared_ptr<NODE>& treeNode,
+                      const boost::shared_ptr<DATA>& myData, VISITOR_POST& visitorPost)
+            : treeNode(treeNode), myData(myData), visitorPost(visitorPost) {}
 
         tbb::task* execute()
         {
@@ -71,10 +72,15 @@ namespace gtsam {
         bool isPostOrderPhase;
 
         PreOrderTask(const boost::shared_ptr<NODE>& treeNode, const boost::shared_ptr<DATA>& myData,
-          VISITOR_PRE& visitorPre, VISITOR_POST& visitorPost, int problemSizeThreshold,
-          bool makeNewTasks = true) :
-          treeNode(treeNode), myData(myData), visitorPre(visitorPre), visitorPost(visitorPost),
-          problemSizeThreshold(problemSizeThreshold), makeNewTasks(makeNewTasks), isPostOrderPhase(false) {}
+                     VISITOR_PRE& visitorPre, VISITOR_POST& visitorPost, int problemSizeThreshold,
+                     bool makeNewTasks = true)
+            : treeNode(treeNode),
+              myData(myData),
+              visitorPre(visitorPre),
+              visitorPost(visitorPost),
+              problemSizeThreshold(problemSizeThreshold),
+              makeNewTasks(makeNewTasks),
+              isPostOrderPhase(false) {}
 
         tbb::task* execute()
         {
@@ -93,8 +99,6 @@ namespace gtsam {
                 // Allocate post-order task as a continuation
                 isPostOrderPhase = true;
                 recycle_as_continuation();
-                //PostOrderTask<NODE, DATA, VISITOR_POST>& postOrderTask =
-                //  *new(allocate_continuation()) PostOrderTask<NODE, DATA, VISITOR_POST>(treeNode, myData, visitorPost);
 
                 bool overThreshold = (treeNode->problemSize() >= problemSizeThreshold);
 
@@ -105,21 +109,18 @@ namespace gtsam {
                   // Process child in a subtask.  Important:  Run visitorPre before calling
                   // allocate_child so that if visitorPre throws an exception, we will not have
                   // allocated an extra child, this causes a TBB error.
-                  boost::shared_ptr<DATA> childData = boost::allocate_shared<DATA>(tbb::scalable_allocator<DATA>(), visitorPre(child, *myData));
-                  //childTasks.push_back(*new(postOrderTask.allocate_child())
-                  //  PreOrderTask(child, childData, visitorPre, visitorPost,
-                  //  problemSizeThreshold, overThreshold));
-                  tbb::task* childTask = new(allocate_child())
-                    PreOrderTask(child, childData, visitorPre, visitorPost,
-                    problemSizeThreshold, overThreshold);
-                  if(firstChild)
+                  boost::shared_ptr<DATA> childData = boost::allocate_shared<DATA>(
+                      tbb::scalable_allocator<DATA>(), visitorPre(child, *myData));
+                  tbb::task* childTask =
+                      new (allocate_child()) PreOrderTask(child, childData, visitorPre, visitorPost,
+                                                          problemSizeThreshold, overThreshold);
+                  if (firstChild)
                     childTasks.push_back(*childTask);
                   else
                     firstChild = childTask;
                 }
 
                 // If we have child tasks, start subtasks and wait for them to complete
-                //postOrderTask.set_ref_count((int) treeNode->children.size());
                 set_ref_count((int)treeNode->children.size());
                 spawn(childTasks);
                 return firstChild;
diff --git a/gtsam/discrete/DiscreteJunctionTree.cpp b/gtsam/discrete/DiscreteJunctionTree.cpp
index 8e6d0f4d8..0e6e2b73e 100644
--- a/gtsam/discrete/DiscreteJunctionTree.cpp
+++ b/gtsam/discrete/DiscreteJunctionTree.cpp
@@ -23,7 +23,7 @@
 namespace gtsam {
 
   // Instantiate base classes
-  template class ClusterTree<DiscreteBayesTree, DiscreteFactorGraph>;
+  template class EliminatableClusterTree<DiscreteBayesTree, DiscreteFactorGraph>;
   template class JunctionTree<DiscreteBayesTree, DiscreteFactorGraph>;
 
   /* ************************************************************************* */
diff --git a/gtsam/discrete/DiscreteJunctionTree.h b/gtsam/discrete/DiscreteJunctionTree.h
index 23924acfd..8c00065d9 100644
--- a/gtsam/discrete/DiscreteJunctionTree.h
+++ b/gtsam/discrete/DiscreteJunctionTree.h
@@ -26,8 +26,8 @@ namespace gtsam {
   class DiscreteEliminationTree;
 
   /**
-   * A ClusterTree, i.e., a set of variable clusters with factors, arranged in a tree, with
-   * the additional property that it represents the clique tree associated with a Bayes net.
+   * An EliminatableClusterTree, i.e., a set of variable clusters with factors, arranged in a tree,
+   * with the additional property that it represents the clique tree associated with a Bayes net.
    *
    * In GTSAM a junction tree is an intermediate data structure in multifrontal
    * variable elimination.  Each node is a cluster of factors, along with a
@@ -39,7 +39,7 @@ namespace gtsam {
    * BayesTree stores conditionals, that are the product of eliminating the factors in the
    * corresponding JunctionTree cliques.
    *
-   * The tree structure and elimination method are exactly analagous to the EliminationTree,
+   * The tree structure and elimination method are exactly analogous to the EliminationTree,
    * except that in the JunctionTree, at each node multiple variables are eliminated at a time.
    *
    * \addtogroup Multifrontal
@@ -53,7 +53,7 @@ namespace gtsam {
     typedef boost::shared_ptr<This> shared_ptr; ///< Shared pointer to this class
     
     /**
-    * Build the elimination tree of a factor graph using pre-computed column structure.
+    * Build the elimination tree of a factor graph using precomputed column structure.
     * @param factorGraph The factor graph for which to build the elimination tree
     * @param structure The set of factors involving each variable.  If this is not
     * precomputed, you can call the Create(const FactorGraph<DERIVEDFACTOR>&)
diff --git a/gtsam/geometry/Cal3_S2.cpp b/gtsam/geometry/Cal3_S2.cpp
index c131d46f7..54deedfdc 100644
--- a/gtsam/geometry/Cal3_S2.cpp
+++ b/gtsam/geometry/Cal3_S2.cpp
@@ -51,6 +51,13 @@ Cal3_S2::Cal3_S2(const std::string &path) :
   infile.close();
 }
 
+/* ************************************************************************* */
+ostream& operator<<(ostream& os, const Cal3_S2& cal) {
+  os << "{fx: " << cal.fx() << ", fy: " << cal.fy() << ", s:" << cal.skew() << ", px:" << cal.px()
+     << ", py:" << cal.py() << "}";
+  return os;
+}
+
 /* ************************************************************************* */
 void Cal3_S2::print(const std::string& s) const {
   gtsam::print((Matrix)matrix(), s);
diff --git a/gtsam/geometry/Cal3_S2.h b/gtsam/geometry/Cal3_S2.h
index ac4b68ccd..6ad7aeb86 100644
--- a/gtsam/geometry/Cal3_S2.h
+++ b/gtsam/geometry/Cal3_S2.h
@@ -75,6 +75,9 @@ public:
   /// @name Testable
   /// @{
 
+  /// Output stream operator
+  GTSAM_EXPORT friend std::ostream &operator<<(std::ostream &os, const Cal3_S2& cal);
+
   /// print with optional string
   void print(const std::string& s = "Cal3_S2") const;
 
diff --git a/gtsam/geometry/CalibratedCamera.h b/gtsam/geometry/CalibratedCamera.h
index 81f448a7c..f1fa509c1 100644
--- a/gtsam/geometry/CalibratedCamera.h
+++ b/gtsam/geometry/CalibratedCamera.h
@@ -261,6 +261,14 @@ public:
   /// @name Named Constructors
   /// @{
 
+  // Create CalibratedCamera, with derivatives
+  static CalibratedCamera Create(const Pose3& pose,
+                                 OptionalJacobian<dimension, 6> H1 = boost::none) {
+    if (H1)
+      *H1 << I_6x6;
+    return CalibratedCamera(pose);
+  }
+
   /**
    * Create a level camera at the given 2D pose and height
    * @param pose2 specifies the location and viewing direction
diff --git a/gtsam/geometry/CameraSet.h b/gtsam/geometry/CameraSet.h
index a5cb5d046..b322a40ab 100644
--- a/gtsam/geometry/CameraSet.h
+++ b/gtsam/geometry/CameraSet.h
@@ -157,28 +157,29 @@ public:
     for (size_t i = 0; i < m; i++) { // for each camera
 
       const MatrixZD& Fi = Fs[i];
+      const auto FiT = Fi.transpose();
       const Eigen::Matrix<double, ZDim, N> Ei_P = //
           E.block(ZDim * i, 0, ZDim, N) * P;
 
       // D = (Dx2) * ZDim
-      augmentedHessian(i, m) = Fi.transpose() * b.segment<ZDim>(ZDim * i) // F' * b
-      - Fi.transpose() * (Ei_P * (E.transpose() * b)); // D = (DxZDim) * (ZDimx3) * (N*ZDimm) * (ZDimm x 1)
+      augmentedHessian.setOffDiagonalBlock(i, m, FiT * b.segment<ZDim>(ZDim * i) // F' * b
+      - FiT * (Ei_P * (E.transpose() * b))); // D = (DxZDim) * (ZDimx3) * (N*ZDimm) * (ZDimm x 1)
 
       // (DxD) = (DxZDim) * ( (ZDimxD) - (ZDimx3) * (3xZDim) * (ZDimxD) )
-      augmentedHessian(i, i) = Fi.transpose()
-          * (Fi - Ei_P * E.block(ZDim * i, 0, ZDim, N).transpose() * Fi);
+      augmentedHessian.setDiagonalBlock(i, FiT
+          * (Fi - Ei_P * E.block(ZDim * i, 0, ZDim, N).transpose() * Fi));
 
       // upper triangular part of the hessian
       for (size_t j = i + 1; j < m; j++) { // for each camera
         const MatrixZD& Fj = Fs[j];
 
         // (DxD) = (Dx2) * ( (2x2) * (2xD) )
-        augmentedHessian(i, j) = -Fi.transpose()
-            * (Ei_P * E.block(ZDim * j, 0, ZDim, N).transpose() * Fj);
+        augmentedHessian.setOffDiagonalBlock(i, j, -FiT
+            * (Ei_P * E.block(ZDim * j, 0, ZDim, N).transpose() * Fj));
       }
     } // end of for over cameras
 
-    augmentedHessian(m, m)(0, 0) += b.squaredNorm();
+    augmentedHessian.diagonalBlock(m)(0, 0) += b.squaredNorm();
     return augmentedHessian;
   }
 
@@ -252,8 +253,6 @@ public:
     // G = F' * F - F' * E * P * E' * F
     // g = F' * (b - E * P * E' * b)
 
-    Eigen::Matrix<double, D, D> matrixBlock;
-
     // a single point is observed in m cameras
     size_t m = Fs.size(); // cameras observing current point
     size_t M = (augmentedHessian.rows() - 1) / D; // all cameras in the group
@@ -263,6 +262,7 @@ public:
     for (size_t i = 0; i < m; i++) { // for each camera in the current factor
 
       const MatrixZD& Fi = Fs[i];
+      const auto FiT = Fi.transpose();
       const Eigen::Matrix<double, 2, N> Ei_P = E.template block<ZDim, N>(
           ZDim * i, 0) * P;
 
@@ -275,17 +275,15 @@ public:
       // information vector - store previous vector
       // vectorBlock = augmentedHessian(aug_i, aug_m).knownOffDiagonal();
       // add contribution of current factor
-      augmentedHessian(aug_i, M) = augmentedHessian(aug_i, M).knownOffDiagonal()
-          + Fi.transpose() * b.segment<ZDim>(ZDim * i) // F' * b
-      - Fi.transpose() * (Ei_P * (E.transpose() * b)); // D = (DxZDim) * (ZDimx3) * (N*ZDimm) * (ZDimm x 1)
+      augmentedHessian.updateOffDiagonalBlock(aug_i, M,
+          FiT * b.segment<ZDim>(ZDim * i)      // F' * b
+        - FiT * (Ei_P * (E.transpose() * b))); // D = (DxZDim) * (ZDimx3) * (N*ZDimm) * (ZDimm x 1)
 
-      // (DxD) = (DxZDim) * ( (ZDimxD) - (ZDimx3) * (3xZDim) * (ZDimxD) )
-      // main block diagonal - store previous block
-      matrixBlock = augmentedHessian(aug_i, aug_i);
+      // (DxD) += (DxZDim) * ( (ZDimxD) - (ZDimx3) * (3xZDim) * (ZDimxD) )
       // add contribution of current factor
-      augmentedHessian(aug_i, aug_i) = matrixBlock
-          + (Fi.transpose()
-              * (Fi - Ei_P * E.template block<ZDim, N>(ZDim * i, 0).transpose() * Fi));
+      // TODO(gareth): Eigen doesn't let us pass the expression. Call eval() for now...
+      augmentedHessian.updateDiagonalBlock(aug_i,
+         ((FiT * (Fi - Ei_P * E.template block<ZDim, N>(ZDim * i, 0).transpose() * Fi))).eval());
 
       // upper triangular part of the hessian
       for (size_t j = i + 1; j < m; j++) { // for each camera
@@ -297,14 +295,12 @@ public:
         // off diagonal block - store previous block
         // matrixBlock = augmentedHessian(aug_i, aug_j).knownOffDiagonal();
         // add contribution of current factor
-        augmentedHessian(aug_i, aug_j) =
-            augmentedHessian(aug_i, aug_j).knownOffDiagonal()
-                - Fi.transpose()
-                    * (Ei_P * E.template block<ZDim, N>(ZDim * j, 0).transpose() * Fj);
+        augmentedHessian.updateOffDiagonalBlock(aug_i, aug_j,
+                -FiT * (Ei_P * E.template block<ZDim, N>(ZDim * j, 0).transpose() * Fj));
       }
     } // end of for over cameras
 
-    augmentedHessian(M, M)(0, 0) += b.squaredNorm();
+    augmentedHessian.diagonalBlock(M)(0, 0) += b.squaredNorm();
   }
 
 private:
diff --git a/gtsam/geometry/PinholePose.h b/gtsam/geometry/PinholePose.h
index ac889c9d7..43ba78ea2 100644
--- a/gtsam/geometry/PinholePose.h
+++ b/gtsam/geometry/PinholePose.h
@@ -312,6 +312,16 @@ public:
     return Base::equals(camera, tol) && K_->equals(e->calibration(), tol);
   }
 
+  /// stream operator
+  friend std::ostream& operator<<(std::ostream &os, const PinholePose& camera) {
+    os << "{R: " << camera.pose().rotation().rpy().transpose();
+    os << ", t: " << camera.pose().translation().transpose();
+    if (!camera.K_) os << ", K: none";
+    else     os << ", K: " << *camera.K_;
+    os << "}";
+    return os;
+  }
+
   /// print
   void print(const std::string& s = "PinholePose") const {
     Base::print(s);
diff --git a/gtsam/geometry/Point3.cpp b/gtsam/geometry/Point3.cpp
index 7dc6bd704..091906d5f 100644
--- a/gtsam/geometry/Point3.cpp
+++ b/gtsam/geometry/Point3.cpp
@@ -57,7 +57,7 @@ double Point3::dot(const Point3 &q, OptionalJacobian<1, 3> H1,
 
 /* ************************************************************************* */
 ostream &operator<<(ostream &os, const Point3& p) {
-  os << '[' << p.x() << ", " << p.y() << ", " << p.z() << "]\';";
+  os << '[' << p.x() << ", " << p.y() << ", " << p.z() << "]'";
   return os;
 }
 
diff --git a/gtsam/geometry/Point3.h b/gtsam/geometry/Point3.h
index 7787b346e..99cb6c2e7 100644
--- a/gtsam/geometry/Point3.h
+++ b/gtsam/geometry/Point3.h
@@ -51,8 +51,8 @@ class GTSAM_EXPORT Point3 : public Vector3 {
     /// @name Standard Constructors
     /// @{
 
-#ifdef GTSAM_ALLOW_DEPRECATED_SINCE_V4
     // Deprecated default constructor initializes to zero, in contrast to new behavior below
+#ifdef GTSAM_ALLOW_DEPRECATED_SINCE_V4
     Point3() { setZero(); }
 #endif
 
@@ -180,10 +180,6 @@ double dot(const Point3& p, const Point3& q,
            OptionalJacobian<1, 3> H_p = boost::none,
            OptionalJacobian<1, 3> H_q = boost::none);
 
-// Convenience typedef
-typedef std::pair<Point3, Point3> Point3Pair;
-std::ostream &operator<<(std::ostream &os, const gtsam::Point3Pair &p);
-
 template <typename A1, typename A2>
 struct Range;
 
diff --git a/gtsam/geometry/Rot3.h b/gtsam/geometry/Rot3.h
index a5b64ef04..bb0278953 100644
--- a/gtsam/geometry/Rot3.h
+++ b/gtsam/geometry/Rot3.h
@@ -118,6 +118,7 @@ namespace gtsam {
      * @param q The quaternion
      */
     Rot3(const Quaternion& q);
+    Rot3(double x, double y, double z, double w) : Rot3(Quaternion(x, y, z, w)) {}
 
     /// Random, generates a random axis, then random angle \in [-p,pi]
     static Rot3 Random(boost::mt19937 & rng);
@@ -156,12 +157,17 @@ namespace gtsam {
 
     /**
      * Returns rotation nRb from body to nav frame.
+     * For vehicle coordinate frame X forward, Y right, Z down:
      * Positive yaw is to right (as in aircraft heading).
      * Positive pitch is up (increasing aircraft altitude).
      * Positive roll is to right (increasing yaw in aircraft).
      * Tait-Bryan system from Spatial Reference Model (SRM) (x,y,z) = (roll,pitch,yaw)
      * as described in http://www.sedris.org/wg8home/Documents/WG80462.pdf.
-     * Assumes vehicle coordinate frame X forward, Y right, Z down.
+     *
+     * For vehicle coordinate frame X forward, Y left, Z up:
+     * Positive yaw is to left (as in aircraft heading).
+     * Positive pitch is down (decreasing aircraft altitude).
+     * Positive roll is to right (decreasing yaw in aircraft).
      */
     static Rot3 Ypr(double y, double p, double r) { return RzRyRx(r,p,y);}
 
diff --git a/gtsam/geometry/Rot3Q.cpp b/gtsam/geometry/Rot3Q.cpp
index f8a01141b..8af9a7144 100644
--- a/gtsam/geometry/Rot3Q.cpp
+++ b/gtsam/geometry/Rot3Q.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -92,7 +92,7 @@ namespace gtsam {
     const Matrix3 R = matrix();
     if (H1) *H1 = R * skewSymmetric(-p.x(), -p.y(), -p.z());
     if (H2) *H2 = R;
-    const Vector3 r = R * p.vector();
+    const Vector3 r = R * p;
     return Point3(r.x(), r.y(), r.z());
   }
 
diff --git a/gtsam/geometry/tests/testCalibratedCamera.cpp b/gtsam/geometry/tests/testCalibratedCamera.cpp
index c7e4e3ae7..e629eb3c6 100644
--- a/gtsam/geometry/tests/testCalibratedCamera.cpp
+++ b/gtsam/geometry/tests/testCalibratedCamera.cpp
@@ -30,10 +30,10 @@ using namespace gtsam;
 GTSAM_CONCEPT_MANIFOLD_INST(CalibratedCamera)
 
 // Camera situated at 0.5 meters high, looking down
-static const Pose3 pose1(Rot3(Vector3(1, -1, -1).asDiagonal()),
+static const Pose3 kDefaultPose(Rot3(Vector3(1, -1, -1).asDiagonal()),
                          Point3(0, 0, 0.5));
 
-static const CalibratedCamera camera(pose1);
+static const CalibratedCamera camera(kDefaultPose);
 
 static const Point3 point1(-0.08,-0.08, 0.0);
 static const Point3 point2(-0.08, 0.08, 0.0);
@@ -43,7 +43,19 @@ static const Point3 point4( 0.08,-0.08, 0.0);
 /* ************************************************************************* */
 TEST( CalibratedCamera, constructor)
 {
-  CHECK(assert_equal( camera.pose(), pose1));
+  CHECK(assert_equal( camera.pose(), kDefaultPose));
+}
+
+//******************************************************************************
+TEST(CalibratedCamera, Create) {
+  Matrix actualH;
+  EXPECT(assert_equal(camera, CalibratedCamera::Create(kDefaultPose, actualH)));
+
+  // Check derivative
+  boost::function<CalibratedCamera(Pose3)> f =  //
+      boost::bind(CalibratedCamera::Create, _1, boost::none);
+  Matrix numericalH = numericalDerivative11<CalibratedCamera, Pose3>(f, kDefaultPose);
+  EXPECT(assert_equal(numericalH, actualH, 1e-9));
 }
 
 /* ************************************************************************* */
@@ -131,8 +143,8 @@ TEST( CalibratedCamera, Dproject_point_pose)
 // Add a test with more arbitrary rotation
 TEST( CalibratedCamera, Dproject_point_pose2)
 {
-  static const Pose3 pose1(Rot3::Ypr(0.1, -0.1, 0.4), Point3(0, 0, -10));
-  static const CalibratedCamera camera(pose1);
+  static const Pose3 kDefaultPose(Rot3::Ypr(0.1, -0.1, 0.4), Point3(0, 0, -10));
+  static const CalibratedCamera camera(kDefaultPose);
   Matrix Dpose, Dpoint;
   camera.project(point1, Dpose, Dpoint);
   Matrix numerical_pose  = numericalDerivative21(project2, camera, point1);
@@ -161,8 +173,8 @@ TEST( CalibratedCamera, Dproject_point_pose_infinity)
 // Add a test with more arbitrary rotation
 TEST( CalibratedCamera, Dproject_point_pose2_infinity)
 {
-  static const Pose3 pose1(Rot3::Ypr(0.1, -0.1, 0.4), Point3(0, 0, -10));
-  static const CalibratedCamera camera(pose1);
+  static const Pose3 pose(Rot3::Ypr(0.1, -0.1, 0.4), Point3(0, 0, -10));
+  static const CalibratedCamera camera(pose);
   Matrix Dpose, Dpoint;
   camera.project2(pointAtInfinity, Dpose, Dpoint);
   Matrix numerical_pose  = numericalDerivative21(projectAtInfinity, camera, pointAtInfinity);
diff --git a/gtsam/geometry/tests/testCameraSet.cpp b/gtsam/geometry/tests/testCameraSet.cpp
index 478f5baf4..01f784ae0 100644
--- a/gtsam/geometry/tests/testCameraSet.cpp
+++ b/gtsam/geometry/tests/testCameraSet.cpp
@@ -90,7 +90,7 @@ TEST(CameraSet, Pinhole) {
   Vector v = Ft * (b - E * P * Et * b);
   schur << Ft * F - Ft * E * P * Et * F, v, v.transpose(), 30;
   SymmetricBlockMatrix actualReduced = Set::SchurComplement(Fs, E, P, b);
-  EXPECT(assert_equal(schur, actualReduced.matrix()));
+  EXPECT(assert_equal(schur, actualReduced.selfadjointView()));
 
   // Check Schur complement update, same order, should just double
   FastVector<Key> allKeys, keys;
@@ -99,7 +99,7 @@ TEST(CameraSet, Pinhole) {
   keys.push_back(1);
   keys.push_back(2);
   Set::UpdateSchurComplement(Fs, E, P, b, allKeys, keys, actualReduced);
-  EXPECT(assert_equal((Matrix )(2.0 * schur), actualReduced.matrix()));
+  EXPECT(assert_equal((Matrix )(2.0 * schur), actualReduced.selfadjointView()));
 
   // Check Schur complement update, keys reversed
   FastVector<Key> keys2;
@@ -111,7 +111,7 @@ TEST(CameraSet, Pinhole) {
   Vector reverse_v = Ft * (reverse_b - E * P * Et * reverse_b);
   Matrix A(19, 19);
   A << Ft * F - Ft * E * P * Et * F, reverse_v, reverse_v.transpose(), 30;
-  EXPECT(assert_equal((Matrix )(2.0 * schur + A), actualReduced.matrix()));
+  EXPECT(assert_equal((Matrix )(2.0 * schur + A), actualReduced.selfadjointView()));
 
   // reprojectionErrorAtInfinity
   Unit3 pointAtInfinity(0, 0, 1000);
diff --git a/gtsam/geometry/tests/testPoint3.cpp b/gtsam/geometry/tests/testPoint3.cpp
index 8dde2b5aa..e2396f7e9 100644
--- a/gtsam/geometry/tests/testPoint3.cpp
+++ b/gtsam/geometry/tests/testPoint3.cpp
@@ -159,7 +159,7 @@ TEST( Point3, stream) {
   Point3 p(1, 2, -3);
   std::ostringstream os;
   os << p;
-  EXPECT(os.str() == "[1, 2, -3]';");
+  EXPECT(os.str() == "[1, 2, -3]'");
 }
 #endif
 
diff --git a/gtsam/geometry/tests/testRot3.cpp b/gtsam/geometry/tests/testRot3.cpp
index 5e72d4c5b..6f0af3828 100644
--- a/gtsam/geometry/tests/testRot3.cpp
+++ b/gtsam/geometry/tests/testRot3.cpp
@@ -231,9 +231,9 @@ TEST(Rot3, retract_localCoordinates)
 /* ************************************************************************* */
 TEST(Rot3, expmap_logmap)
 {
-  Vector3 d12 = Vector3::Constant(0.1);
+  Vector d12 = Vector3::Constant(0.1);
   Rot3 R2 = R.expmap(d12);
-  EXPECT(assert_equal(d12, (Vector) R.logmap(R2)));
+  EXPECT(assert_equal(d12, R.logmap(R2)));
 }
 
 /* ************************************************************************* */
diff --git a/gtsam/inference/BayesTree.h b/gtsam/inference/BayesTree.h
index 4d68acb5b..c22a5e257 100644
--- a/gtsam/inference/BayesTree.h
+++ b/gtsam/inference/BayesTree.h
@@ -30,7 +30,7 @@ namespace gtsam {
 
   // Forward declarations
   template<class FACTOR> class FactorGraph;
-  template<class BAYESTREE, class GRAPH> class ClusterTree;
+  template<class BAYESTREE, class GRAPH> class EliminatableClusterTree;
 
   /* ************************************************************************* */
   /** clique statistics */
@@ -247,7 +247,7 @@ namespace gtsam {
     void fillNodesIndex(const sharedClique& subtree);
 
     // Friend JunctionTree because it directly fills roots and nodes index.
-    template<class BAYESRTEE, class GRAPH> friend class ClusterTree;
+    template<class BAYESRTEE, class GRAPH> friend class EliminatableClusterTree;
 
   private:
     /** Serialization function */
diff --git a/gtsam/inference/ClusterTree-inst.h b/gtsam/inference/ClusterTree-inst.h
index bd886273a..b042c0c8e 100644
--- a/gtsam/inference/ClusterTree-inst.h
+++ b/gtsam/inference/ClusterTree-inst.h
@@ -1,5 +1,5 @@
 /**
- * @file ClusterTree-inst.h
+ * @file EliminatableClusterTree-inst.h
  * @date Oct 8, 2013
  * @author Kai Ni
  * @author Richard Roberts
@@ -7,16 +7,102 @@
  * @brief Collects factorgraph fragments defined on variable clusters, arranged in a tree
  */
 
+#pragma once
+
 #include <gtsam/inference/ClusterTree.h>
 #include <gtsam/inference/BayesTree.h>
 #include <gtsam/inference/Ordering.h>
 #include <gtsam/base/timing.h>
 #include <gtsam/base/treeTraversal-inst.h>
 
-#include <boost/bind.hpp>
-
 namespace gtsam {
 
+/* ************************************************************************* */
+template<class GRAPH>
+void ClusterTree<GRAPH>::Cluster::print(const std::string& s,
+    const KeyFormatter& keyFormatter) const {
+  std::cout << s << " (" << problemSize_ << ")";
+  PrintKeyVector(orderedFrontalKeys);
+}
+
+/* ************************************************************************* */
+template <class GRAPH>
+std::vector<size_t> ClusterTree<GRAPH>::Cluster::nrFrontalsOfChildren() const {
+  std::vector<size_t> nrFrontals;
+  nrFrontals.reserve(nrChildren());
+  for (const sharedNode& child : children)
+    nrFrontals.push_back(child->nrFrontals());
+  return nrFrontals;
+}
+
+/* ************************************************************************* */
+template <class GRAPH>
+void ClusterTree<GRAPH>::Cluster::merge(const boost::shared_ptr<Cluster>& cluster) {
+  // Merge keys. For efficiency, we add keys in reverse order at end, calling reverse after..
+  orderedFrontalKeys.insert(orderedFrontalKeys.end(), cluster->orderedFrontalKeys.rbegin(),
+                            cluster->orderedFrontalKeys.rend());
+  factors.push_back(cluster->factors);
+  children.insert(children.end(), cluster->children.begin(), cluster->children.end());
+  // Increment problem size
+  problemSize_ = std::max(problemSize_, cluster->problemSize_);
+}
+
+/* ************************************************************************* */
+template<class GRAPH>
+void ClusterTree<GRAPH>::Cluster::mergeChildren(
+    const std::vector<bool>& merge) {
+  gttic(Cluster_mergeChildren);
+  assert(merge.size() == this->children.size());
+
+  // Count how many keys, factors and children we'll end up with
+  size_t nrKeys = orderedFrontalKeys.size();
+  size_t nrFactors = factors.size();
+  size_t nrNewChildren = 0;
+  // Loop over children
+  size_t i = 0;
+  for(const sharedNode& child: this->children) {
+    if (merge[i]) {
+      nrKeys += child->orderedFrontalKeys.size();
+      nrFactors += child->factors.size();
+      nrNewChildren += child->nrChildren();
+    } else {
+      nrNewChildren += 1; // we keep the child
+    }
+    ++i;
+  }
+
+  // now reserve space, and really merge
+  auto oldChildren = this->children;
+  this->children.clear();
+  this->children.reserve(nrNewChildren);
+  orderedFrontalKeys.reserve(nrKeys);
+  factors.reserve(nrFactors);
+  i = 0;
+  for (const sharedNode& child : oldChildren) {
+    if (merge[i]) {
+      this->merge(child);
+    } else {
+      this->addChild(child);  // we keep the child
+    }
+    ++i;
+  }
+  std::reverse(orderedFrontalKeys.begin(), orderedFrontalKeys.end());
+}
+
+/* ************************************************************************* */
+template <class GRAPH>
+void ClusterTree<GRAPH>::print(const std::string& s, const KeyFormatter& keyFormatter) const {
+  treeTraversal::PrintForest(*this, s, keyFormatter);
+}
+
+/* ************************************************************************* */
+template <class GRAPH>
+ClusterTree<GRAPH>& ClusterTree<GRAPH>::operator=(const This& other) {
+  // Start by duplicating the tree.
+  roots_ = treeTraversal::CloneForest(other);
+  return *this;
+}
+
 /* ************************************************************************* */
 // Elimination traversal data - stores a pointer to the parent data and collects
 // the factors resulting from elimination of the children.  Also sets up BayesTree
@@ -34,6 +120,7 @@ struct EliminationData {
   size_t myIndexInParent;
   FastVector<sharedFactor> childFactors;
   boost::shared_ptr<BTNode> bayesTreeNode;
+
   EliminationData(EliminationData* _parentData, size_t nChildren) :
       parentData(_parentData), bayesTreeNode(boost::make_shared<BTNode>()) {
     if (parentData) {
@@ -55,7 +142,7 @@ struct EliminationData {
       const typename CLUSTERTREE::sharedNode& node,
       EliminationData& parentData) {
     assert(node);
-    EliminationData myData(&parentData, node->children.size());
+    EliminationData myData(&parentData, node->nrChildren());
     myData.bayesTreeNode->problemSize_ = node->problemSize();
     return myData;
   }
@@ -75,120 +162,51 @@ struct EliminationData {
     }
 
     // Function that does the HEAVY lifting
-    void operator()(const typename CLUSTERTREE::sharedNode& node,
-        EliminationData& myData) {
+    void operator()(const typename CLUSTERTREE::sharedNode& node, EliminationData& myData) {
       assert(node);
 
       // Gather factors
       FactorGraphType gatheredFactors;
-      gatheredFactors.reserve(node->factors.size() + node->children.size());
+      gatheredFactors.reserve(node->factors.size() + node->nrChildren());
       gatheredFactors += node->factors;
       gatheredFactors += myData.childFactors;
 
       // Check for Bayes tree orphan subtrees, and add them to our children
-      for(const sharedFactor& f: node->factors) {
-        if (const BayesTreeOrphanWrapper<BTNode>* asSubtree =
-            dynamic_cast<const BayesTreeOrphanWrapper<BTNode>*>(f.get())) {
+      // TODO(frank): should this really happen here?
+      for (const sharedFactor& factor: node->factors) {
+        auto asSubtree = dynamic_cast<const BayesTreeOrphanWrapper<BTNode>*>(factor.get());
+        if (asSubtree) {
           myData.bayesTreeNode->children.push_back(asSubtree->clique);
           asSubtree->clique->parent_ = myData.bayesTreeNode;
         }
       }
 
       // >>>>>>>>>>>>>> Do dense elimination step >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-      std::pair<boost::shared_ptr<ConditionalType>,
-          boost::shared_ptr<FactorType> > eliminationResult =
-          eliminationFunction_(gatheredFactors, node->orderedFrontalKeys);
+      auto eliminationResult = eliminationFunction_(gatheredFactors, node->orderedFrontalKeys);
       // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-      // Store conditional in BayesTree clique, and in the case of ISAM2Clique also store the remaining factor
+      // Store conditional in BayesTree clique, and in the case of ISAM2Clique also store the
+      // remaining factor
       myData.bayesTreeNode->setEliminationResult(eliminationResult);
 
       // Fill nodes index - we do this here instead of calling insertRoot at the end to avoid
       // putting orphan subtrees in the index - they'll already be in the index of the ISAM2
       // object they're added to.
-      for(const Key& j: myData.bayesTreeNode->conditional()->frontals())
+      for (const Key& j: myData.bayesTreeNode->conditional()->frontals())
         nodesIndex_.insert(std::make_pair(j, myData.bayesTreeNode));
 
       // Store remaining factor in parent's gathered factors
       if (!eliminationResult.second->empty())
-        myData.parentData->childFactors[myData.myIndexInParent] =
-            eliminationResult.second;
+        myData.parentData->childFactors[myData.myIndexInParent] = eliminationResult.second;
     }
   };
 };
 
 /* ************************************************************************* */
 template<class BAYESTREE, class GRAPH>
-void ClusterTree<BAYESTREE, GRAPH>::Cluster::print(const std::string& s,
-    const KeyFormatter& keyFormatter) const {
-  std::cout << s << " (" << problemSize_ << ")";
-  PrintKeyVector(orderedFrontalKeys);
-}
-
-/* ************************************************************************* */
-template<class BAYESTREE, class GRAPH>
-void ClusterTree<BAYESTREE, GRAPH>::Cluster::mergeChildren(
-    const std::vector<bool>& merge) {
-  gttic(Cluster_mergeChildren);
-
-  // Count how many keys, factors and children we'll end up with
-  size_t nrKeys = orderedFrontalKeys.size();
-  size_t nrFactors = factors.size();
-  size_t nrNewChildren = 0;
-  // Loop over children
-  size_t i = 0;
-  for(const sharedNode& child: children) {
-    if (merge[i]) {
-      nrKeys += child->orderedFrontalKeys.size();
-      nrFactors += child->factors.size();
-      nrNewChildren += child->children.size();
-    } else {
-      nrNewChildren += 1; // we keep the child
-    }
-    ++i;
-  }
-
-  // now reserve space, and really merge
-  orderedFrontalKeys.reserve(nrKeys);
-  factors.reserve(nrFactors);
-  typename Node::Children newChildren;
-  newChildren.reserve(nrNewChildren);
-  i = 0;
-  for(const sharedNode& child: children) {
-    if (merge[i]) {
-      // Merge keys. For efficiency, we add keys in reverse order at end, calling reverse after..
-      orderedFrontalKeys.insert(orderedFrontalKeys.end(),
-          child->orderedFrontalKeys.rbegin(), child->orderedFrontalKeys.rend());
-      // Merge keys, factors, and children.
-      factors.insert(factors.end(), child->factors.begin(),
-          child->factors.end());
-      newChildren.insert(newChildren.end(), child->children.begin(),
-          child->children.end());
-      // Increment problem size
-      problemSize_ = std::max(problemSize_, child->problemSize_);
-      // Increment number of frontal variables
-    } else {
-      newChildren.push_back(child); // we keep the child
-    }
-    ++i;
-  }
-  children = newChildren;
-  std::reverse(orderedFrontalKeys.begin(), orderedFrontalKeys.end());
-}
-
-/* ************************************************************************* */
-template<class BAYESTREE, class GRAPH>
-void ClusterTree<BAYESTREE, GRAPH>::print(const std::string& s,
-    const KeyFormatter& keyFormatter) const {
-  treeTraversal::PrintForest(*this, s, keyFormatter);
-}
-
-/* ************************************************************************* */
-template<class BAYESTREE, class GRAPH>
-ClusterTree<BAYESTREE, GRAPH>& ClusterTree<BAYESTREE, GRAPH>::operator=(
+EliminatableClusterTree<BAYESTREE, GRAPH>& EliminatableClusterTree<BAYESTREE, GRAPH>::operator=(
     const This& other) {
-  // Start by duplicating the tree.
-  roots_ = treeTraversal::CloneForest(other);
+  ClusterTree<GRAPH>::operator=(other);
 
   // Assign the remaining factors - these are pointers to factors in the original factor graph and
   // we do not clone them.
@@ -198,41 +216,40 @@ ClusterTree<BAYESTREE, GRAPH>& ClusterTree<BAYESTREE, GRAPH>::operator=(
 }
 
 /* ************************************************************************* */
-template<class BAYESTREE, class GRAPH>
-std::pair<boost::shared_ptr<BAYESTREE>, boost::shared_ptr<GRAPH> > ClusterTree<
-    BAYESTREE, GRAPH>::eliminate(const Eliminate& function) const {
+template <class BAYESTREE, class GRAPH>
+std::pair<boost::shared_ptr<BAYESTREE>, boost::shared_ptr<GRAPH> >
+EliminatableClusterTree<BAYESTREE, GRAPH>::eliminate(const Eliminate& function) const {
   gttic(ClusterTree_eliminate);
   // Do elimination (depth-first traversal).  The rootsContainer stores a 'dummy' BayesTree node
   // that contains all of the roots as its children.  rootsContainer also stores the remaining
-  // uneliminated factors passed up from the roots.
+  // un-eliminated factors passed up from the roots.
   boost::shared_ptr<BayesTreeType> result = boost::make_shared<BayesTreeType>();
+
   typedef EliminationData<This> Data;
-  Data rootsContainer(0, roots_.size());
-  typename Data::EliminationPostOrderVisitor visitorPost(function,
-      result->nodes_);
+  Data rootsContainer(0, this->nrRoots());
+
+  typename Data::EliminationPostOrderVisitor visitorPost(function, result->nodes_);
   {
-    TbbOpenMPMixedScope threadLimiter; // Limits OpenMP threads since we're mixing TBB and OpenMP
-    treeTraversal::DepthFirstForestParallel(*this, rootsContainer,
-        Data::EliminationPreOrderVisitor, visitorPost, 10);
+    TbbOpenMPMixedScope threadLimiter;  // Limits OpenMP threads since we're mixing TBB and OpenMP
+    treeTraversal::DepthFirstForestParallel(*this, rootsContainer, Data::EliminationPreOrderVisitor,
+                                            visitorPost, 10);
   }
 
   // Create BayesTree from roots stored in the dummy BayesTree node.
-  result->roots_.insert(result->roots_.end(),
-      rootsContainer.bayesTreeNode->children.begin(),
-      rootsContainer.bayesTreeNode->children.end());
+  result->roots_.insert(result->roots_.end(), rootsContainer.bayesTreeNode->children.begin(),
+                        rootsContainer.bayesTreeNode->children.end());
 
   // Add remaining factors that were not involved with eliminated variables
-  boost::shared_ptr<FactorGraphType> remaining = boost::make_shared<
-      FactorGraphType>();
-  remaining->reserve(
-      remainingFactors_.size() + rootsContainer.childFactors.size());
+  boost::shared_ptr<FactorGraphType> remaining = boost::make_shared<FactorGraphType>();
+  remaining->reserve(remainingFactors_.size() + rootsContainer.childFactors.size());
   remaining->push_back(remainingFactors_.begin(), remainingFactors_.end());
-  for(const sharedFactor& factor: rootsContainer.childFactors) {
+  for (const sharedFactor& factor : rootsContainer.childFactors) {
     if (factor)
       remaining->push_back(factor);
   }
+
   // Return result
   return std::make_pair(result, remaining);
 }
 
-}
+} // namespace gtsam
diff --git a/gtsam/inference/ClusterTree.h b/gtsam/inference/ClusterTree.h
index b00532d22..e225bac5f 100644
--- a/gtsam/inference/ClusterTree.h
+++ b/gtsam/inference/ClusterTree.h
@@ -1,5 +1,5 @@
 /**
- * @file ClusterTree.h
+ * @file EliminatableClusterTree.h
  * @date Oct 8, 2013
  * @author Kai Ni
  * @author Richard Roberts
@@ -21,57 +21,182 @@ namespace gtsam {
  * each factor \f$ f_i \f$ is associated with a single cluster and \f$ scope(f_i) \sub C_k \f$.
  * \nosubgrouping
  */
-template<class BAYESTREE, class GRAPH>
+template <class GRAPH>
 class ClusterTree {
-public:
-  typedef GRAPH FactorGraphType; ///< The factor graph type
-  typedef typename GRAPH::FactorType FactorType; ///< The type of factors
-  typedef ClusterTree<BAYESTREE, GRAPH> This; ///< This class
-  typedef boost::shared_ptr<This> shared_ptr; ///< Shared pointer to this class
-  typedef boost::shared_ptr<FactorType> sharedFactor; ///< Shared pointer to a factor
-  typedef BAYESTREE BayesTreeType; ///< The BayesTree type produced by elimination
-  typedef typename BayesTreeType::ConditionalType ConditionalType; ///< The type of conditionals
-  typedef boost::shared_ptr<ConditionalType> sharedConditional; ///< Shared pointer to a conditional
-  typedef typename FactorGraphType::Eliminate Eliminate; ///< Typedef for an eliminate subroutine
+ public:
+  typedef GRAPH FactorGraphType;               ///< The factor graph type
+  typedef ClusterTree<GRAPH> This;             ///< This class
+  typedef boost::shared_ptr<This> shared_ptr;  ///< Shared pointer to this class
 
+  typedef typename GRAPH::FactorType FactorType;       ///< The type of factors
+  typedef boost::shared_ptr<FactorType> sharedFactor;  ///< Shared pointer to a factor
+
+  /// A Cluster is just a collection of factors
+  // TODO(frank): re-factor JunctionTree so we can make members private
   struct Cluster {
-    typedef Ordering Keys;
-    typedef FastVector<sharedFactor> Factors;
     typedef FastVector<boost::shared_ptr<Cluster> > Children;
+    Children children;  ///< sub-trees
 
-    Cluster() {
-    }
-    Cluster(Key key, const Factors& factors) :
-        factors(factors) {
-      orderedFrontalKeys.push_back(key);
-    }
+    typedef Ordering Keys;
+    Keys orderedFrontalKeys;  ///< Frontal keys of this node
+
+    FactorGraphType factors;  ///< Factors associated with this node
 
-    Keys orderedFrontalKeys; ///< Frontal keys of this node
-    Factors factors; ///< Factors associated with this node
-    Children children; ///< sub-trees
     int problemSize_;
 
+    Cluster() : problemSize_(0) {}
+
+    virtual ~Cluster() {}
+
+    const Cluster& operator[](size_t i) const {
+      return *(children[i]);
+    }
+
+    /// Construct from factors associated with a single key
+    template <class CONTAINER>
+    Cluster(Key key, const CONTAINER& factorsToAdd)
+        : problemSize_(0) {
+      addFactors(key, factorsToAdd);
+    }
+
+    /// Add factors associated with a single key
+    template <class CONTAINER>
+    void addFactors(Key key, const CONTAINER& factorsToAdd) {
+      orderedFrontalKeys.push_back(key);
+      factors.push_back(factorsToAdd);
+      problemSize_ += factors.size();
+    }
+
+    /// Add a child cluster
+    void addChild(const boost::shared_ptr<Cluster>& cluster) {
+      children.push_back(cluster);
+      problemSize_ = std::max(problemSize_, cluster->problemSize_);
+    }
+
+    size_t nrChildren() const {
+      return children.size();
+    }
+
+    size_t nrFactors() const {
+      return factors.size();
+    }
+
+    size_t nrFrontals() const {
+      return orderedFrontalKeys.size();
+    }
+
     int problemSize() const {
       return problemSize_;
     }
 
     /// print this node
-    void print(const std::string& s = "", const KeyFormatter& keyFormatter =
-        DefaultKeyFormatter) const;
+    virtual void print(const std::string& s = "",
+                       const KeyFormatter& keyFormatter = DefaultKeyFormatter) const;
+
+    /// Return a vector with nrFrontal keys for each child
+    std::vector<size_t> nrFrontalsOfChildren() const;
+
+    /// Merge in given cluster
+    void merge(const boost::shared_ptr<Cluster>& cluster);
 
     /// Merge all children for which bit is set into this node
     void mergeChildren(const std::vector<bool>& merge);
   };
 
-  typedef boost::shared_ptr<Cluster> sharedCluster; ///< Shared pointer to Cluster
-  typedef Cluster Node; ///< Define Node=Cluster for compatibility with tree traversal functions
-  typedef sharedCluster sharedNode; ///< Define Node=Cluster for compatibility with tree traversal functions
+  typedef boost::shared_ptr<Cluster> sharedCluster;  ///< Shared pointer to Cluster
+
+  // Define Node=Cluster for compatibility with tree traversal functions
+  typedef Cluster Node;
+  typedef sharedCluster sharedNode;
 
   /** concept check */
   GTSAM_CONCEPT_TESTABLE_TYPE(FactorType);
 
-protected:
+ protected:
   FastVector<sharedNode> roots_;
+
+  /// @name Standard Constructors
+  /// @{
+
+  /** Copy constructor - makes a deep copy of the tree structure, but only pointers to factors are
+   *  copied, factors are not cloned. */
+  ClusterTree(const This& other) {
+    *this = other;
+  }
+
+  /// @}
+
+ public:
+
+  /// Default constructor
+  ClusterTree() {}
+
+  /// @name Testable
+  /// @{
+
+  /** Print the cluster tree */
+  void print(const std::string& s = "",
+             const KeyFormatter& keyFormatter = DefaultKeyFormatter) const;
+
+  /// @}
+
+  /// @name Advanced Interface
+  /// @{
+
+  void addRoot(const boost::shared_ptr<Cluster>& cluster) {
+    roots_.push_back(cluster);
+  }
+
+  void addChildrenAsRoots(const boost::shared_ptr<Cluster>& cluster) {
+    for (auto child : cluster->children)
+      this->addRoot(child);
+  }
+
+  size_t nrRoots() const {
+    return roots_.size();
+  }
+
+  /** Return the set of roots (one for a tree, multiple for a forest) */
+  const FastVector<sharedNode>& roots() const {
+    return roots_;
+  }
+
+  const Cluster& operator[](size_t i) const {
+    return *(roots_[i]);
+  }
+
+  /// @}
+
+ protected:
+  /// @name Details
+
+  /// Assignment operator - makes a deep copy of the tree structure, but only pointers to factors
+  /// are copied, factors are not cloned.
+  This& operator=(const This& other);
+
+  /// @}
+};
+
+/**
+ * A cluster-tree that eliminates to a Bayes tree.
+ */
+template <class BAYESTREE, class GRAPH>
+class EliminatableClusterTree : public ClusterTree<GRAPH> {
+ public:
+  typedef BAYESTREE BayesTreeType;  ///< The BayesTree type produced by elimination
+  typedef GRAPH FactorGraphType;    ///< The factor graph type
+  typedef EliminatableClusterTree<BAYESTREE, GRAPH> This;  ///< This class
+  typedef boost::shared_ptr<This> shared_ptr;              ///< Shared pointer to this class
+
+  typedef typename BAYESTREE::ConditionalType ConditionalType;  ///< The type of conditionals
+  typedef boost::shared_ptr<ConditionalType>
+      sharedConditional;  ///< Shared pointer to a conditional
+
+  typedef typename GRAPH::Eliminate Eliminate;         ///< Typedef for an eliminate subroutine
+  typedef typename GRAPH::FactorType FactorType;       ///< The type of factors
+  typedef boost::shared_ptr<FactorType> sharedFactor;  ///< Shared pointer to a factor
+
+ protected:
   FastVector<sharedFactor> remainingFactors_;
 
   /// @name Standard Constructors
@@ -79,19 +204,13 @@ protected:
 
   /** Copy constructor - makes a deep copy of the tree structure, but only pointers to factors are
    *  copied, factors are not cloned. */
-  ClusterTree(const This& other) {*this = other;}
-
-  /// @}
-
-public:
-  /// @name Testable
-  /// @{
-
-  /** Print the cluster tree */
-  void print(const std::string& s = "", const KeyFormatter& keyFormatter = DefaultKeyFormatter) const;
+  EliminatableClusterTree(const This& other) : ClusterTree<GRAPH>(other) {
+    *this = other;
+  }
 
   /// @}
 
+ public:
   /// @name Standard Interface
   /// @{
 
@@ -100,23 +219,22 @@ public:
    * in GaussianFactorGraph.h
    * @return The Bayes tree and factor graph resulting from elimination
    */
-  std::pair<boost::shared_ptr<BayesTreeType>, boost::shared_ptr<FactorGraphType> >
-  eliminate(const Eliminate& function) const;
+  std::pair<boost::shared_ptr<BayesTreeType>, boost::shared_ptr<FactorGraphType> > eliminate(
+      const Eliminate& function) const;
 
   /// @}
 
   /// @name Advanced Interface
   /// @{
 
-  /** Return the set of roots (one for a tree, multiple for a forest) */
-  const FastVector<sharedNode>& roots() const {return roots_;}
-
   /** Return the remaining factors that are not pulled into elimination */
-  const FastVector<sharedFactor>& remainingFactors() const {return remainingFactors_;}
+  const FastVector<sharedFactor>& remainingFactors() const {
+    return remainingFactors_;
+  }
 
   /// @}
 
-protected:
+ protected:
   /// @name Details
 
   /// Assignment operator - makes a deep copy of the tree structure, but only pointers to factors
@@ -124,11 +242,10 @@ protected:
   This& operator=(const This& other);
 
   /// Default constructor to be used in derived classes
-  ClusterTree() {}
+  EliminatableClusterTree() {}
 
   /// @}
-
 };
-
 }
 
+#include <gtsam/inference/ClusterTree-inst.h>
diff --git a/gtsam/inference/JunctionTree-inst.h b/gtsam/inference/JunctionTree-inst.h
index 8fcdd1eac..04472f7e3 100644
--- a/gtsam/inference/JunctionTree-inst.h
+++ b/gtsam/inference/JunctionTree-inst.h
@@ -54,7 +54,7 @@ struct ConstructorTraversalData {
     // pointer in its parent.
     ConstructorTraversalData myData = ConstructorTraversalData(&parentData);
     myData.myJTNode = boost::make_shared<Node>(node->key, node->factors);
-    parentData.myJTNode->children.push_back(myData.myJTNode);
+    parentData.myJTNode->addChild(myData.myJTNode);
     return myData;
   }
 
@@ -99,20 +99,20 @@ struct ConstructorTraversalData {
     // Merge our children if they are in our clique - if our conditional has
     // exactly one fewer parent than our child's conditional.
     const size_t myNrParents = myConditional->nrParents();
-    const size_t nrChildren = node->children.size();
+    const size_t nrChildren = node->nrChildren();
     assert(childConditionals.size() == nrChildren);
 
     // decide which children to merge, as index into children
+    std::vector<size_t> nrFrontals = node->nrFrontalsOfChildren();
     std::vector<bool> merge(nrChildren, false);
-    size_t myNrFrontals = 1, i = 0;
-    for(const sharedNode& child: node->children) {
+    size_t myNrFrontals = 1;
+    for (size_t i = 0;i<nrChildren;i++){
       // Check if we should merge the i^th child
       if (myNrParents + myNrFrontals == childConditionals[i]->nrParents()) {
         // Increment number of frontal variables
-        myNrFrontals += child->orderedFrontalKeys.size();
+        myNrFrontals += nrFrontals[i];
         merge[i] = true;
       }
-      ++i;
     }
 
     // now really merge
@@ -145,10 +145,7 @@ JunctionTree<BAYESTREE, GRAPH>::JunctionTree(
       Data::ConstructorTraversalVisitorPostAlg2);
 
   // Assign roots from the dummy node
-  typedef typename JunctionTree<BAYESTREE, GRAPH>::Node Node;
-  const typename Node::Children& children = rootData.myJTNode->children;
-  Base::roots_.reserve(children.size());
-  Base::roots_.insert(Base::roots_.begin(), children.begin(), children.end());
+  this->addChildrenAsRoots(rootData.myJTNode);
 
   // Transfer remaining factors from elimination tree
   Base::remainingFactors_ = eliminationTree.remainingFactors();
diff --git a/gtsam/inference/JunctionTree.h b/gtsam/inference/JunctionTree.h
index cdf38b97c..e01f3721a 100644
--- a/gtsam/inference/JunctionTree.h
+++ b/gtsam/inference/JunctionTree.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -28,15 +28,13 @@ namespace gtsam {
   template<class BAYESNET, class GRAPH> class EliminationTree;
 
   /**
-   * A JunctionTree is a ClusterTree, i.e., a set of variable clusters with factors, arranged
-   * in a tree, with the additional property that it represents the clique tree associated
-   * with a Bayes net.
+   * A JunctionTree is a cluster tree, a set of variable clusters with factors, arranged in a tree,
+   * with the additional property that it represents the clique tree associated with a Bayes Net.
    *
-   * In GTSAM a junction tree is an intermediate data structure in multifrontal
-   * variable elimination.  Each node is a cluster of factors, along with a
-   * clique of variables that are eliminated all at once. In detail, every node k represents
-   * a clique (maximal fully connected subset) of an associated chordal graph, such as a
-   * chordal Bayes net resulting from elimination.
+   * In GTSAM a junction tree is an intermediate data structure in multifrontal variable
+   * elimination.  Each node is a cluster of factors, along with a clique of variables that are
+   * eliminated all at once. In detail, every node k represents a clique (maximal fully connected
+   * subset) of an associated chordal graph, such as a chordal Bayes net resulting from elimination.
    *
    * The difference with the BayesTree is that a JunctionTree stores factors, whereas a
    * BayesTree stores conditionals, that are the product of eliminating the factors in the
@@ -49,13 +47,13 @@ namespace gtsam {
    * \nosubgrouping
    */
   template<class BAYESTREE, class GRAPH>
-  class JunctionTree : public ClusterTree<BAYESTREE, GRAPH> {
+  class JunctionTree : public EliminatableClusterTree<BAYESTREE, GRAPH> {
 
   public:
 
     typedef JunctionTree<BAYESTREE, GRAPH> This; ///< This class
     typedef boost::shared_ptr<This> shared_ptr; ///< Shared pointer to this class
-    typedef ClusterTree<BAYESTREE, GRAPH> Base; ///< Our base class
+    typedef EliminatableClusterTree<BAYESTREE, GRAPH> Base; ///< Our base class
 
   protected:
 
@@ -65,7 +63,7 @@ namespace gtsam {
     /** Build the junction tree from an elimination tree. */
     template<class ETREE>
       static This FromEliminationTree(const ETREE& eliminationTree) { return This(eliminationTree); }
-      
+
     /** Build the junction tree from an elimination tree. */
     template<class ETREE_BAYESNET, class ETREE_GRAPH>
     JunctionTree(const EliminationTree<ETREE_BAYESNET, ETREE_GRAPH>& eliminationTree);
diff --git a/gtsam/inference/Ordering.cpp b/gtsam/inference/Ordering.cpp
index 931610639..e12c32df3 100644
--- a/gtsam/inference/Ordering.cpp
+++ b/gtsam/inference/Ordering.cpp
@@ -72,7 +72,7 @@ Ordering Ordering::ColamdConstrained(const VariableIndex& variableIndex,
     for(size_t factorIndex: column) {
       A[count++] = (int) factorIndex; // copy sparse column
     }
-    p[index + 1] = count; // column j (base 1) goes from A[j-1] to A[j]-1
+    p[index + 1] = count;  // column j (base 1) goes from A[j-1] to A[j]-1
     // Store key in array and increment index
     keys[index] = key_factors.first;
     ++index;
@@ -123,6 +123,7 @@ Ordering Ordering::ColamdConstrainedLast(const VariableIndex& variableIndex,
   std::vector<int> cmember(n, 0);
 
   // Build a mapping to look up sorted Key indices by Key
+  // TODO(frank): think of a way to not build this
   FastMap<Key, size_t> keyIndices;
   size_t j = 0;
   for (auto key_factors: variableIndex)
diff --git a/gtsam/inference/Ordering.h b/gtsam/inference/Ordering.h
index 8af2649c5..ae7a10f44 100644
--- a/gtsam/inference/Ordering.h
+++ b/gtsam/inference/Ordering.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -79,7 +79,10 @@ public:
   /// it is faster to use COLAMD(const VariableIndex&)
   template<class FACTOR>
   static Ordering Colamd(const FactorGraph<FACTOR>& graph) {
-    return Colamd(VariableIndex(graph));
+    if (graph.empty())
+      return Ordering();
+    else
+      return Colamd(VariableIndex(graph));
   }
 
   /// Compute a fill-reducing ordering using COLAMD from a VariableIndex.
@@ -96,8 +99,10 @@ public:
   template<class FACTOR>
   static Ordering ColamdConstrainedLast(const FactorGraph<FACTOR>& graph,
       const std::vector<Key>& constrainLast, bool forceOrder = false) {
-    return ColamdConstrainedLast(VariableIndex(graph), constrainLast,
-        forceOrder);
+    if (graph.empty())
+      return Ordering();
+    else
+      return ColamdConstrainedLast(VariableIndex(graph), constrainLast, forceOrder);
   }
 
   /// Compute a fill-reducing ordering using constrained COLAMD from a VariableIndex.  This
@@ -121,8 +126,10 @@ public:
   template<class FACTOR>
   static Ordering ColamdConstrainedFirst(const FactorGraph<FACTOR>& graph,
       const std::vector<Key>& constrainFirst, bool forceOrder = false) {
-    return ColamdConstrainedFirst(VariableIndex(graph), constrainFirst,
-        forceOrder);
+    if (graph.empty())
+      return Ordering();
+    else
+      return ColamdConstrainedFirst(VariableIndex(graph), constrainFirst, forceOrder);
   }
 
   /// Compute a fill-reducing ordering using constrained COLAMD from a VariableIndex.  This
@@ -148,7 +155,10 @@ public:
   template<class FACTOR>
   static Ordering ColamdConstrained(const FactorGraph<FACTOR>& graph,
       const FastMap<Key, int>& groups) {
-    return ColamdConstrained(VariableIndex(graph), groups);
+    if (graph.empty())
+      return Ordering();
+    else
+      return ColamdConstrained(VariableIndex(graph), groups);
   }
 
   /// Compute a fill-reducing ordering using constrained COLAMD from a VariableIndex.  In this
@@ -190,6 +200,8 @@ public:
   template<class FACTOR>
   static Ordering Create(OrderingType orderingType,
       const FactorGraph<FACTOR>& graph) {
+    if (graph.empty())
+      return Ordering();
 
     switch (orderingType) {
     case COLAMD:
diff --git a/gtsam/inference/tests/testOrdering.cpp b/gtsam/inference/tests/testOrdering.cpp
index 52528ad7f..0d23d9655 100644
--- a/gtsam/inference/tests/testOrdering.cpp
+++ b/gtsam/inference/tests/testOrdering.cpp
@@ -30,36 +30,64 @@ using namespace boost::assign;
 
 namespace example {
 SymbolicFactorGraph symbolicChain() {
-  SymbolicFactorGraph sfg;
-  sfg.push_factor(0, 1);
-  sfg.push_factor(1, 2);
-  sfg.push_factor(2, 3);
-  sfg.push_factor(3, 4);
-  sfg.push_factor(4, 5);
-  return sfg;
+  SymbolicFactorGraph symbolicGraph;
+  symbolicGraph.push_factor(0, 1);
+  symbolicGraph.push_factor(1, 2);
+  symbolicGraph.push_factor(2, 3);
+  symbolicGraph.push_factor(3, 4);
+  symbolicGraph.push_factor(4, 5);
+  return symbolicGraph;
 }
 }
 /* ************************************************************************* */
 TEST(Ordering, constrained_ordering) {
-
   // create graph with wanted variable set = 2, 4
-  SymbolicFactorGraph sfg = example::symbolicChain();
+  SymbolicFactorGraph symbolicGraph = example::symbolicChain();
 
   // unconstrained version
-  Ordering actUnconstrained = Ordering::Colamd(sfg);
-  Ordering expUnconstrained = Ordering(list_of(0)(1)(2)(3)(4)(5));
-  EXPECT(assert_equal(expUnconstrained, actUnconstrained));
+  {
+  Ordering actual = Ordering::Colamd(symbolicGraph);
+  Ordering expected = Ordering(list_of(0)(1)(2)(3)(4)(5));
+  EXPECT(assert_equal(expected, actual));
+  }
 
   // constrained version - push one set to the end
-  Ordering actConstrained = Ordering::ColamdConstrainedLast(sfg, list_of(2)(4));
-  Ordering expConstrained = Ordering(list_of(0)(1)(5)(3)(4)(2));
-  EXPECT(assert_equal(expConstrained, actConstrained));
+  {
+    Ordering actual = Ordering::ColamdConstrainedLast(symbolicGraph, list_of(2)(4));
+    Ordering expected = Ordering(list_of(0)(1)(5)(3)(4)(2));
+    EXPECT(assert_equal(expected, actual));
+  }
 
   // constrained version - push one set to the start
-  Ordering actConstrained2 = Ordering::ColamdConstrainedFirst(sfg,
-      list_of(2)(4));
-  Ordering expConstrained2 = Ordering(list_of(2)(4)(0)(1)(3)(5));
-  EXPECT(assert_equal(expConstrained2, actConstrained2));
+  {
+    Ordering actual = Ordering::ColamdConstrainedFirst(symbolicGraph, list_of(2)(4));
+    Ordering expected = Ordering(list_of(2)(4)(0)(1)(3)(5));
+    EXPECT(assert_equal(expected, actual));
+  }
+
+  // Make sure giving empty constraints does not break the code
+  {
+    Ordering actual = Ordering::ColamdConstrainedLast(symbolicGraph, {});
+    Ordering expected = Ordering(list_of(0)(1)(2)(3)(4)(5));
+    EXPECT(assert_equal(expected, actual));
+  }
+  {
+    Ordering actual = Ordering::ColamdConstrainedFirst(symbolicGraph, {});
+    Ordering expected = Ordering(list_of(0)(1)(2)(3)(4)(5));
+    EXPECT(assert_equal(expected, actual));
+  }
+
+  // Make sure giving empty graph does not break the code
+  SymbolicFactorGraph emptyGraph;
+  Ordering empty;
+  {
+    Ordering actual = Ordering::ColamdConstrainedLast(emptyGraph, list_of(2)(4));
+    EXPECT(assert_equal(empty, actual));
+  }
+  {
+    Ordering actual = Ordering::ColamdConstrainedFirst(emptyGraph, list_of(2)(4));
+    EXPECT(assert_equal(empty, actual));
+  }
 }
 
 /* ************************************************************************* */
@@ -68,7 +96,7 @@ TEST(Ordering, grouped_constrained_ordering) {
   // create graph with constrained groups:
   // 1: 2, 4
   // 2: 5
-  SymbolicFactorGraph sfg = example::symbolicChain();
+  SymbolicFactorGraph symbolicGraph = example::symbolicChain();
 
   // constrained version - push one set to the end
   FastMap<Key, int> constraints;
@@ -76,40 +104,40 @@ TEST(Ordering, grouped_constrained_ordering) {
   constraints[4] = 1;
   constraints[5] = 2;
 
-  Ordering actConstrained = Ordering::ColamdConstrained(sfg, constraints);
-  Ordering expConstrained = list_of(0)(1)(3)(2)(4)(5);
-  EXPECT(assert_equal(expConstrained, actConstrained));
+  Ordering actual = Ordering::ColamdConstrained(symbolicGraph, constraints);
+  Ordering expected = list_of(0)(1)(3)(2)(4)(5);
+  EXPECT(assert_equal(expected, actual));
 }
 
 /* ************************************************************************* */
 TEST(Ordering, csr_format) {
   // Example in METIS manual
-  SymbolicFactorGraph sfg;
-  sfg.push_factor(0, 1);
-  sfg.push_factor(1, 2);
-  sfg.push_factor(2, 3);
-  sfg.push_factor(3, 4);
-  sfg.push_factor(5, 6);
-  sfg.push_factor(6, 7);
-  sfg.push_factor(7, 8);
-  sfg.push_factor(8, 9);
-  sfg.push_factor(10, 11);
-  sfg.push_factor(11, 12);
-  sfg.push_factor(12, 13);
-  sfg.push_factor(13, 14);
+  SymbolicFactorGraph symbolicGraph;
+  symbolicGraph.push_factor(0, 1);
+  symbolicGraph.push_factor(1, 2);
+  symbolicGraph.push_factor(2, 3);
+  symbolicGraph.push_factor(3, 4);
+  symbolicGraph.push_factor(5, 6);
+  symbolicGraph.push_factor(6, 7);
+  symbolicGraph.push_factor(7, 8);
+  symbolicGraph.push_factor(8, 9);
+  symbolicGraph.push_factor(10, 11);
+  symbolicGraph.push_factor(11, 12);
+  symbolicGraph.push_factor(12, 13);
+  symbolicGraph.push_factor(13, 14);
 
-  sfg.push_factor(0, 5);
-  sfg.push_factor(5, 10);
-  sfg.push_factor(1, 6);
-  sfg.push_factor(6, 11);
-  sfg.push_factor(2, 7);
-  sfg.push_factor(7, 12);
-  sfg.push_factor(3, 8);
-  sfg.push_factor(8, 13);
-  sfg.push_factor(4, 9);
-  sfg.push_factor(9, 14);
+  symbolicGraph.push_factor(0, 5);
+  symbolicGraph.push_factor(5, 10);
+  symbolicGraph.push_factor(1, 6);
+  symbolicGraph.push_factor(6, 11);
+  symbolicGraph.push_factor(2, 7);
+  symbolicGraph.push_factor(7, 12);
+  symbolicGraph.push_factor(3, 8);
+  symbolicGraph.push_factor(8, 13);
+  symbolicGraph.push_factor(4, 9);
+  symbolicGraph.push_factor(9, 14);
 
-  MetisIndex mi(sfg);
+  MetisIndex mi(symbolicGraph);
 
   vector<int> xadjExpected, adjExpected;
   xadjExpected += 0, 2, 5, 8, 11, 13, 16, 20, 24, 28, 31, 33, 36, 39, 42, 44;
@@ -122,16 +150,16 @@ TEST(Ordering, csr_format) {
 
 /* ************************************************************************* */
 TEST(Ordering, csr_format_2) {
-  SymbolicFactorGraph sfg;
+  SymbolicFactorGraph symbolicGraph;
 
-  sfg.push_factor(0);
-  sfg.push_factor(0, 1);
-  sfg.push_factor(1, 2);
-  sfg.push_factor(2, 3);
-  sfg.push_factor(3, 4);
-  sfg.push_factor(4, 1);
+  symbolicGraph.push_factor(0);
+  symbolicGraph.push_factor(0, 1);
+  symbolicGraph.push_factor(1, 2);
+  symbolicGraph.push_factor(2, 3);
+  symbolicGraph.push_factor(3, 4);
+  symbolicGraph.push_factor(4, 1);
 
-  MetisIndex mi(sfg);
+  MetisIndex mi(symbolicGraph);
 
   vector<int> xadjExpected, adjExpected;
   xadjExpected += 0, 1, 4, 6, 8, 10;
@@ -144,16 +172,16 @@ TEST(Ordering, csr_format_2) {
 
 /* ************************************************************************* */
 TEST(Ordering, csr_format_3) {
-  SymbolicFactorGraph sfg;
+  SymbolicFactorGraph symbolicGraph;
 
-  sfg.push_factor(100);
-  sfg.push_factor(100, 101);
-  sfg.push_factor(101, 102);
-  sfg.push_factor(102, 103);
-  sfg.push_factor(103, 104);
-  sfg.push_factor(104, 101);
+  symbolicGraph.push_factor(100);
+  symbolicGraph.push_factor(100, 101);
+  symbolicGraph.push_factor(101, 102);
+  symbolicGraph.push_factor(102, 103);
+  symbolicGraph.push_factor(103, 104);
+  symbolicGraph.push_factor(104, 101);
 
-  MetisIndex mi(sfg);
+  MetisIndex mi(symbolicGraph);
 
   vector<int> xadjExpected, adjExpected;
   xadjExpected += 0, 1, 4, 6, 8, 10;
@@ -174,16 +202,16 @@ TEST(Ordering, csr_format_3) {
 /* ************************************************************************* */
 #ifdef GTSAM_SUPPORT_NESTED_DISSECTION
 TEST(Ordering, csr_format_4) {
-  SymbolicFactorGraph sfg;
+  SymbolicFactorGraph symbolicGraph;
 
-  sfg.push_factor(Symbol('x', 1));
-  sfg.push_factor(Symbol('x', 1), Symbol('x', 2));
-  sfg.push_factor(Symbol('x', 2), Symbol('x', 3));
-  sfg.push_factor(Symbol('x', 3), Symbol('x', 4));
-  sfg.push_factor(Symbol('x', 4), Symbol('x', 5));
-  sfg.push_factor(Symbol('x', 5), Symbol('x', 6));
+  symbolicGraph.push_factor(Symbol('x', 1));
+  symbolicGraph.push_factor(Symbol('x', 1), Symbol('x', 2));
+  symbolicGraph.push_factor(Symbol('x', 2), Symbol('x', 3));
+  symbolicGraph.push_factor(Symbol('x', 3), Symbol('x', 4));
+  symbolicGraph.push_factor(Symbol('x', 4), Symbol('x', 5));
+  symbolicGraph.push_factor(Symbol('x', 5), Symbol('x', 6));
 
-  MetisIndex mi(sfg);
+  MetisIndex mi(symbolicGraph);
 
   vector<int> xadjExpected, adjExpected;
   xadjExpected += 0, 1, 3, 5, 7, 9, 10;
@@ -196,29 +224,29 @@ TEST(Ordering, csr_format_4) {
   EXPECT(adjExpected.size() == mi.adj().size());
   EXPECT(adjExpected == adjAcutal);
 
-  Ordering metOrder = Ordering::Metis(sfg);
+  Ordering metOrder = Ordering::Metis(symbolicGraph);
 
   // Test different symbol types
-  sfg.push_factor(Symbol('l', 1));
-  sfg.push_factor(Symbol('x', 1), Symbol('l', 1));
-  sfg.push_factor(Symbol('x', 2), Symbol('l', 1));
-  sfg.push_factor(Symbol('x', 3), Symbol('l', 1));
-  sfg.push_factor(Symbol('x', 4), Symbol('l', 1));
+  symbolicGraph.push_factor(Symbol('l', 1));
+  symbolicGraph.push_factor(Symbol('x', 1), Symbol('l', 1));
+  symbolicGraph.push_factor(Symbol('x', 2), Symbol('l', 1));
+  symbolicGraph.push_factor(Symbol('x', 3), Symbol('l', 1));
+  symbolicGraph.push_factor(Symbol('x', 4), Symbol('l', 1));
 
-  Ordering metOrder2 = Ordering::Metis(sfg);
+  Ordering metOrder2 = Ordering::Metis(symbolicGraph);
 }
 #endif
 /* ************************************************************************* */
 #ifdef GTSAM_SUPPORT_NESTED_DISSECTION
 TEST(Ordering, metis) {
 
-  SymbolicFactorGraph sfg;
+  SymbolicFactorGraph symbolicGraph;
 
-  sfg.push_factor(0);
-  sfg.push_factor(0, 1);
-  sfg.push_factor(1, 2);
+  symbolicGraph.push_factor(0);
+  symbolicGraph.push_factor(0, 1);
+  symbolicGraph.push_factor(1, 2);
 
-  MetisIndex mi(sfg);
+  MetisIndex mi(symbolicGraph);
 
   vector<int> xadjExpected, adjExpected;
   xadjExpected += 0, 1, 3, 4;
@@ -228,7 +256,7 @@ TEST(Ordering, metis) {
   EXPECT(adjExpected.size() == mi.adj().size());
   EXPECT(adjExpected == mi.adj());
 
-  Ordering metis = Ordering::Metis(sfg);
+  Ordering metis = Ordering::Metis(symbolicGraph);
 }
 #endif
 /* ************************************************************************* */
@@ -236,15 +264,15 @@ TEST(Ordering, metis) {
 TEST(Ordering, MetisLoop) {
 
   // create linear graph
-  SymbolicFactorGraph sfg = example::symbolicChain();
+  SymbolicFactorGraph symbolicGraph = example::symbolicChain();
 
   // add loop closure
-  sfg.push_factor(0, 5);
+  symbolicGraph.push_factor(0, 5);
 
   // METIS
 #if !defined(__APPLE__)
   {
-    Ordering actual = Ordering::Create(Ordering::METIS, sfg);
+    Ordering actual = Ordering::Create(Ordering::METIS, symbolicGraph);
     //  - P( 0 4 1)
     //  | - P( 2 | 4 1)
     //  | | - P( 3 | 4 2)
@@ -254,7 +282,7 @@ TEST(Ordering, MetisLoop) {
   }
 #else
   {
-    Ordering actual = Ordering::Create(Ordering::METIS, sfg);
+    Ordering actual = Ordering::Create(Ordering::METIS, symbolicGraph);
     //  - P( 1 0 3)
     //  | - P( 4 | 0 3)
     //  | | - P( 5 | 0 4)
@@ -269,7 +297,7 @@ TEST(Ordering, MetisLoop) {
 TEST(Ordering, Create) {
 
   // create chain graph
-  SymbolicFactorGraph sfg = example::symbolicChain();
+  SymbolicFactorGraph symbolicGraph = example::symbolicChain();
 
   // COLAMD
   {
@@ -278,7 +306,7 @@ TEST(Ordering, Create) {
     //| | - P( 2 | 3)
     //| | | - P( 1 | 2)
     //| | | | - P( 0 | 1)
-    Ordering actual = Ordering::Create(Ordering::COLAMD, sfg);
+    Ordering actual = Ordering::Create(Ordering::COLAMD, symbolicGraph);
     Ordering expected = Ordering(list_of(0)(1)(2)(3)(4)(5));
     EXPECT(assert_equal(expected, actual));
   }
@@ -286,7 +314,7 @@ TEST(Ordering, Create) {
 #ifdef GTSAM_SUPPORT_NESTED_DISSECTION
   // METIS
   {
-    Ordering actual = Ordering::Create(Ordering::METIS, sfg);
+    Ordering actual = Ordering::Create(Ordering::METIS, symbolicGraph);
     //- P( 1 0 2)
     //| - P( 3 4 | 2)
     //| | - P( 5 | 4)
@@ -296,7 +324,7 @@ TEST(Ordering, Create) {
 #endif
 
   // CUSTOM
-  CHECK_EXCEPTION(Ordering::Create(Ordering::CUSTOM, sfg), runtime_error);
+  CHECK_EXCEPTION(Ordering::Create(Ordering::CUSTOM, symbolicGraph), runtime_error);
 }
 
 /* ************************************************************************* */
diff --git a/gtsam/linear/BinaryJacobianFactor.h b/gtsam/linear/BinaryJacobianFactor.h
index 23d11964c..462258762 100644
--- a/gtsam/linear/BinaryJacobianFactor.h
+++ b/gtsam/linear/BinaryJacobianFactor.h
@@ -73,12 +73,12 @@ struct BinaryJacobianFactor: JacobianFactor {
       Eigen::Block<const Matrix, M, 1> b(Ab, 0, N1 + N2);
 
       // We perform I += A'*A to the upper triangle
-      (*info)(slot1, slot1).selfadjointView().rankUpdate(A1.transpose());
-      (*info)(slot1, slot2).knownOffDiagonal() += A1.transpose() * A2;
-      (*info)(slot1, slotB).knownOffDiagonal() += A1.transpose() * b;
-      (*info)(slot2, slot2).selfadjointView().rankUpdate(A2.transpose());
-      (*info)(slot2, slotB).knownOffDiagonal() += A2.transpose() * b;
-      (*info)(slotB, slotB)(0, 0) += b.transpose() * b;
+      info->diagonalBlock(slot1).rankUpdate(A1.transpose());
+      info->updateOffDiagonalBlock(slot1, slot2, A1.transpose() * A2);
+      info->updateOffDiagonalBlock(slot1, slotB, A1.transpose() * b);
+      info->diagonalBlock(slot2).rankUpdate(A2.transpose());
+      info->updateOffDiagonalBlock(slot2, slotB, A2.transpose() * b);
+      info->updateDiagonalBlock(slotB, b.transpose() * b);
     }
   }
 };
diff --git a/gtsam/linear/GaussianFactorGraph.cpp b/gtsam/linear/GaussianFactorGraph.cpp
index ff67b9cb1..a4df04cf9 100644
--- a/gtsam/linear/GaussianFactorGraph.cpp
+++ b/gtsam/linear/GaussianFactorGraph.cpp
@@ -48,7 +48,7 @@ namespace gtsam {
   /* ************************************************************************* */
   GaussianFactorGraph::Keys GaussianFactorGraph::keys() const {
     KeySet keys;
-    for(const sharedFactor& factor: *this)
+    for (const sharedFactor& factor: *this)
     if (factor)
       keys.insert(factor->begin(), factor->end());
     return keys;
@@ -57,8 +57,8 @@ namespace gtsam {
   /* ************************************************************************* */
   std::map<Key, size_t> GaussianFactorGraph::getKeyDimMap() const {
     map<Key, size_t> spec;
-    for ( const GaussianFactor::shared_ptr &gf: *this ) {
-      for ( GaussianFactor::const_iterator it = gf->begin() ; it != gf->end() ; it++ ) {
+    for (const GaussianFactor::shared_ptr& gf : *this) {
+      for (GaussianFactor::const_iterator it = gf->begin(); it != gf->end(); it++) {
         map<Key,size_t>::iterator it2 = spec.find(*it);
         if ( it2 == spec.end() ) {
           spec.insert(make_pair(*it, gf->getDim(it)));
@@ -78,7 +78,7 @@ namespace gtsam {
   /* ************************************************************************* */
   GaussianFactorGraph GaussianFactorGraph::clone() const {
     GaussianFactorGraph result;
-    for(const sharedFactor& f: *this) {
+    for (const sharedFactor& f : *this) {
       if (f)
         result.push_back(f->clone());
       else
@@ -90,9 +90,9 @@ namespace gtsam {
   /* ************************************************************************* */
   GaussianFactorGraph GaussianFactorGraph::negate() const {
     GaussianFactorGraph result;
-    for(const sharedFactor& f: *this) {
-      if (f)
-        result.push_back(f->negate());
+    for (const sharedFactor& factor: *this) {
+      if (factor)
+        result.push_back(factor->negate());
       else
         result.push_back(sharedFactor()); // Passes on null factors so indices remain valid
     }
@@ -104,8 +104,9 @@ namespace gtsam {
     // First find dimensions of each variable
     typedef std::map<Key, size_t> KeySizeMap;
     KeySizeMap dims;
-    for(const sharedFactor& factor: *this) {
-      if (!static_cast<bool>(factor)) continue;
+    for (const sharedFactor& factor : *this) {
+      if (!static_cast<bool>(factor))
+        continue;
 
       for (GaussianFactor::const_iterator key = factor->begin();
           key != factor->end(); ++key) {
@@ -116,7 +117,7 @@ namespace gtsam {
     // Compute first scalar column of each variable
     size_t currentColIndex = 0;
     KeySizeMap columnIndices = dims;
-    for(const KeySizeMap::value_type& col: dims) {
+    for (const KeySizeMap::value_type& col : dims) {
       columnIndices[col.first] = currentColIndex;
       currentColIndex += dims[col.first];
     }
@@ -125,7 +126,7 @@ namespace gtsam {
     typedef boost::tuple<size_t, size_t, double> triplet;
     vector<triplet> entries;
     size_t row = 0;
-    for(const sharedFactor& factor: *this) {
+    for (const sharedFactor& factor : *this) {
       if (!static_cast<bool>(factor)) continue;
 
       // Convert to JacobianFactor if necessary
@@ -209,25 +210,21 @@ namespace gtsam {
     // combine all factors and get upper-triangular part of Hessian
     Scatter scatter(*this, optionalOrdering);
     HessianFactor combined(*this, scatter);
-    Matrix result = combined.info();
-    // Fill in lower-triangular part of Hessian
-    result.triangularView<Eigen::StrictlyLower>() = result.transpose();
-    return result;
+    return combined.info().selfadjointView();;
   }
 
   /* ************************************************************************* */
   pair<Matrix, Vector> GaussianFactorGraph::hessian(
       boost::optional<const Ordering&> optionalOrdering) const {
     Matrix augmented = augmentedHessian(optionalOrdering);
-    return make_pair(
-        augmented.topLeftCorner(augmented.rows() - 1, augmented.rows() - 1),
-        augmented.col(augmented.rows() - 1).head(augmented.rows() - 1));
+    size_t n = augmented.rows() - 1;
+    return make_pair(augmented.topLeftCorner(n, n), augmented.topRightCorner(n, 1));
   }
 
   /* ************************************************************************* */
   VectorValues GaussianFactorGraph::hessianDiagonal() const {
     VectorValues d;
-    for(const sharedFactor& factor: *this) {
+    for (const sharedFactor& factor : *this) {
       if(factor){
         VectorValues di = factor->hessianDiagonal();
         d.addInPlace_(di);
@@ -239,11 +236,11 @@ namespace gtsam {
   /* ************************************************************************* */
   map<Key,Matrix> GaussianFactorGraph::hessianBlockDiagonal() const {
     map<Key,Matrix> blocks;
-    for(const sharedFactor& factor: *this) {
+    for (const sharedFactor& factor : *this) {
       if (!factor) continue;
       map<Key,Matrix> BD = factor->hessianBlockDiagonal();
       map<Key,Matrix>::const_iterator it = BD.begin();
-      for(;it!=BD.end();it++) {
+      for (;it!=BD.end();++it) {
         Key j = it->first; // variable key for this block
         const Matrix& Bj = it->second;
         if (blocks.count(j))
@@ -262,6 +259,30 @@ namespace gtsam {
     return BaseEliminateable::eliminateMultifrontal(ordering, function)->optimize();
   }
 
+  /* ************************************************************************* */
+  // TODO(frank): can we cache memory across invocations
+  VectorValues GaussianFactorGraph::optimizeDensely() const {
+    gttic(GaussianFactorGraph_optimizeDensely);
+
+    // Combine all factors in a single HessianFactor (as done in augmentedHessian)
+    Scatter scatter(*this);
+    HessianFactor combined(*this, scatter);
+
+    // TODO(frank): cast to large dynamic matrix :-(
+    // NOTE(frank): info only valid (I think) in upper triangle. No problem for LLT...
+    Matrix augmented = combined.info().selfadjointView();
+
+    // Do Cholesky Factorization
+    size_t n = augmented.rows() - 1;
+    auto AtA = augmented.topLeftCorner(n, n);
+    auto eta = augmented.topRightCorner(n, 1);
+    Eigen::LLT<Matrix, Eigen::Upper> llt(AtA);
+
+    // Solve and convert, re-using scatter data structure
+    Vector solution = llt.solve(eta);
+    return VectorValues(solution, scatter);
+  }
+
   /* ************************************************************************* */
   namespace {
     JacobianFactor::shared_ptr convertToJacobianFactorPtr(const GaussianFactor::shared_ptr &gf) {
@@ -277,8 +298,8 @@ namespace gtsam {
   VectorValues GaussianFactorGraph::gradient(const VectorValues& x0) const
   {
     VectorValues g = VectorValues::Zero(x0);
-    for(const sharedFactor& Ai_G: *this) {
-      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
+    for (const sharedFactor& factor: *this) {
+      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
       Vector e = Ai->error_vector(x0);
       Ai->transposeMultiplyAdd(1.0, e, g);
     }
@@ -289,7 +310,7 @@ namespace gtsam {
   VectorValues GaussianFactorGraph::gradientAtZero() const {
     // Zero-out the gradient
     VectorValues g;
-    for(const sharedFactor& factor: *this) {
+    for (const sharedFactor& factor: *this) {
       if (!factor) continue;
       VectorValues gi = factor->gradientAtZero();
       g.addInPlace_(gi);
@@ -329,8 +350,8 @@ namespace gtsam {
   /* ************************************************************************* */
   Errors GaussianFactorGraph::operator*(const VectorValues& x) const {
     Errors e;
-    for(const GaussianFactor::shared_ptr& Ai_G: *this) {
-      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
+    for (const GaussianFactor::shared_ptr& factor: *this) {
+      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
       e.push_back((*Ai) * x);
     }
     return e;
@@ -339,7 +360,7 @@ namespace gtsam {
   /* ************************************************************************* */
   void GaussianFactorGraph::multiplyHessianAdd(double alpha,
       const VectorValues& x, VectorValues& y) const {
-    for(const GaussianFactor::shared_ptr& f: *this)
+    for (const GaussianFactor::shared_ptr& f: *this)
      f->multiplyHessianAdd(alpha, x, y);
   }
 
@@ -351,8 +372,8 @@ namespace gtsam {
   /* ************************************************************************* */
   void GaussianFactorGraph::multiplyInPlace(const VectorValues& x, const Errors::iterator& e) const {
     Errors::iterator ei = e;
-    for(const GaussianFactor::shared_ptr& Ai_G: *this) {
-      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
+    for (const GaussianFactor::shared_ptr& factor: *this) {
+      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
       *ei = (*Ai)*x;
       ei++;
     }
@@ -361,7 +382,7 @@ namespace gtsam {
   /* ************************************************************************* */
   bool hasConstraints(const GaussianFactorGraph& factors) {
     typedef JacobianFactor J;
-    for(const GaussianFactor::shared_ptr& factor: factors) {
+    for (const GaussianFactor::shared_ptr& factor: factors) {
       J::shared_ptr jacobian(boost::dynamic_pointer_cast<J>(factor));
       if (jacobian && jacobian->get_model() && jacobian->get_model()->isConstrained()) {
         return true;
@@ -376,8 +397,8 @@ namespace gtsam {
                                                  VectorValues& x) const {
     // For each factor add the gradient contribution
     Errors::const_iterator ei = e.begin();
-    for(const sharedFactor& Ai_G: *this) {
-      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
+    for (const sharedFactor& factor: *this) {
+      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
       Ai->transposeMultiplyAdd(alpha, *(ei++), x);
     }
   }
@@ -385,8 +406,8 @@ namespace gtsam {
   ///* ************************************************************************* */
   //void residual(const GaussianFactorGraph& fg, const VectorValues &x, VectorValues &r) {
   //  Key i = 0 ;
-  //  for(const GaussianFactor::shared_ptr& Ai_G: fg) {
-  //    JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
+  //  for (const GaussianFactor::shared_ptr& factor, fg) {
+  //    JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
   //    r[i] = Ai->getb();
   //    i++;
   //  }
@@ -399,10 +420,10 @@ namespace gtsam {
   //void multiply(const GaussianFactorGraph& fg, const VectorValues &x, VectorValues &r) {
   //  r.setZero();
   //  Key i = 0;
-  //  for(const GaussianFactor::shared_ptr& Ai_G: fg) {
-  //    JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
+  //  for (const GaussianFactor::shared_ptr& factor, fg) {
+  //    JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
   //    Vector &y = r[i];
-  //    for(JacobianFactor::const_iterator j = Ai->begin(); j != Ai->end(); ++j) {
+  //    for (JacobianFactor::const_iterator j = Ai->begin(); j != Ai->end(); ++j) {
   //      y += Ai->getA(j) * x[*j];
   //    }
   //    ++i;
@@ -414,9 +435,9 @@ namespace gtsam {
   {
     VectorValues x;
     Errors::const_iterator ei = e.begin();
-    for(const sharedFactor& Ai_G: *this) {
-      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
-      for(JacobianFactor::const_iterator j = Ai->begin(); j != Ai->end(); ++j) {
+    for (const sharedFactor& factor: *this) {
+      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
+      for (JacobianFactor::const_iterator j = Ai->begin(); j != Ai->end(); ++j) {
         // Create the value as a zero vector if it does not exist.
         pair<VectorValues::iterator, bool> xi = x.tryInsert(*j, Vector());
         if(xi.second)
@@ -432,8 +453,8 @@ namespace gtsam {
   Errors GaussianFactorGraph::gaussianErrors(const VectorValues& x) const
   {
     Errors e;
-    for(const sharedFactor& Ai_G: *this) {
-      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(Ai_G);
+    for (const sharedFactor& factor: *this) {
+      JacobianFactor::shared_ptr Ai = convertToJacobianFactorPtr(factor);
       e.push_back(Ai->error_vector(x));
     }
     return e;
diff --git a/gtsam/linear/GaussianFactorGraph.h b/gtsam/linear/GaussianFactorGraph.h
index d969f77ad..49cba482d 100644
--- a/gtsam/linear/GaussianFactorGraph.h
+++ b/gtsam/linear/GaussianFactorGraph.h
@@ -246,6 +246,11 @@ namespace gtsam {
     VectorValues optimize(OptionalOrdering ordering = boost::none,
       const Eliminate& function = EliminationTraitsType::DefaultEliminate) const;
 
+    /**
+     * Optimize using Eigen's dense Cholesky factorization
+     */
+    VectorValues optimizeDensely() const;
+
     /**
      * Compute the gradient of the energy function,
      * \f$ \nabla_{x=x_0} \left\Vert \Sigma^{-1} A x - b \right\Vert^2 \f$,
diff --git a/gtsam/linear/GaussianJunctionTree.cpp b/gtsam/linear/GaussianJunctionTree.cpp
index 982c17bd3..b038e5452 100644
--- a/gtsam/linear/GaussianJunctionTree.cpp
+++ b/gtsam/linear/GaussianJunctionTree.cpp
@@ -23,7 +23,7 @@
 namespace gtsam {
 
   // Instantiate base classes
-  template class ClusterTree<GaussianBayesTree, GaussianFactorGraph>;
+  template class EliminatableClusterTree<GaussianBayesTree, GaussianFactorGraph>;
   template class JunctionTree<GaussianBayesTree, GaussianFactorGraph>;
 
   /* ************************************************************************* */
diff --git a/gtsam/linear/GaussianJunctionTree.h b/gtsam/linear/GaussianJunctionTree.h
index f944d417b..67e5a374b 100644
--- a/gtsam/linear/GaussianJunctionTree.h
+++ b/gtsam/linear/GaussianJunctionTree.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -26,21 +26,9 @@ namespace gtsam {
   class GaussianEliminationTree;
 
   /**
-   * A ClusterTree, i.e., a set of variable clusters with factors, arranged in a tree, with
-   * the additional property that it represents the clique tree associated with a Bayes net.
-   *
-   * In GTSAM a junction tree is an intermediate data structure in multifrontal
-   * variable elimination.  Each node is a cluster of factors, along with a
-   * clique of variables that are eliminated all at once. In detail, every node k represents
-   * a clique (maximal fully connected subset) of an associated chordal graph, such as a
-   * chordal Bayes net resulting from elimination.
-   *
-   * The difference with the BayesTree is that a JunctionTree stores factors, whereas a
-   * BayesTree stores conditionals, that are the product of eliminating the factors in the
-   * corresponding JunctionTree cliques.
-   *
-   * The tree structure and elimination method are exactly analagous to the EliminationTree,
-   * except that in the JunctionTree, at each node multiple variables are eliminated at a time.
+   * A junction tree specialized to Gaussian factors, i.e., it is a cluster tree with Gaussian
+   * factors stored in each cluster. It can be eliminated into a Gaussian Bayes tree with the same
+   * structure, which is essentially doing multifrontal sparse matrix factorization.
    *
    * \addtogroup Multifrontal
    * \nosubgrouping
@@ -51,7 +39,7 @@ namespace gtsam {
     typedef JunctionTree<GaussianBayesTree, GaussianFactorGraph> Base; ///< Base class
     typedef GaussianJunctionTree This; ///< This class
     typedef boost::shared_ptr<This> shared_ptr; ///< Shared pointer to this class
-    
+
     /**
     * Build the elimination tree of a factor graph using pre-computed column structure.
     * @param factorGraph The factor graph for which to build the elimination tree
diff --git a/gtsam/linear/HessianFactor-inl.h b/gtsam/linear/HessianFactor-inl.h
index 4a21ab0ff..957ae4519 100644
--- a/gtsam/linear/HessianFactor-inl.h
+++ b/gtsam/linear/HessianFactor-inl.h
@@ -29,10 +29,10 @@ namespace gtsam {
     if((DenseIndex)Base::keys_.size() != augmentedInformation.nBlocks() - 1)
       throw std::invalid_argument(
       "Error in HessianFactor constructor input.  Number of provided keys plus\n"
-      "one for the information vector must equal the number of provided matrix blocks.");
+      "one for the information vector must equal the number of provided matrix blocks. ");
 
     // Check RHS dimension
-    if(augmentedInformation(0, augmentedInformation.nBlocks() - 1).cols() != 1)
+    if(augmentedInformation.getDim(augmentedInformation.nBlocks() - 1) != 1)
       throw std::invalid_argument(
       "Error in HessianFactor constructor input.  The last provided matrix block\n"
       "must be the information vector, but the last provided block had more than one column.");
diff --git a/gtsam/linear/HessianFactor.cpp b/gtsam/linear/HessianFactor.cpp
index 2d635d3c6..29b2b8591 100644
--- a/gtsam/linear/HessianFactor.cpp
+++ b/gtsam/linear/HessianFactor.cpp
@@ -57,10 +57,33 @@ using namespace boost::adaptors;
 
 namespace gtsam {
 
+/* ************************************************************************* */
+void HessianFactor::Allocate(const Scatter& scatter) {
+  gttic(HessianFactor_Allocate);
+
+  // Allocate with dimensions for each variable plus 1 at the end for the information vector
+  const size_t n = scatter.size();
+  keys_.resize(n);
+  FastVector<DenseIndex> dims(n + 1);
+  DenseIndex slot = 0;
+  for(const SlotEntry& slotentry: scatter) {
+    keys_[slot] = slotentry.key;
+    dims[slot] = slotentry.dimension;
+    ++slot;
+  }
+  dims.back() = 1;
+  info_ = SymmetricBlockMatrix(dims);
+}
+
+/* ************************************************************************* */
+HessianFactor::HessianFactor(const Scatter& scatter) {
+  Allocate(scatter);
+}
+
 /* ************************************************************************* */
 HessianFactor::HessianFactor() :
     info_(cref_list_of<1>(1)) {
-  linearTerm().setZero();
+  assert(info_.rows() == 1);
   constantTerm() = 0.0;
 }
 
@@ -70,9 +93,9 @@ HessianFactor::HessianFactor(Key j, const Matrix& G, const Vector& g, double f)
   if (G.rows() != G.cols() || G.rows() != g.size())
     throw invalid_argument(
         "Attempting to construct HessianFactor with inconsistent matrix and/or vector dimensions");
-  info_(0, 0) = G;
-  info_(0, 1) = g;
-  info_(1, 1)(0, 0) = f;
+  info_.setDiagonalBlock(0, G);
+  linearTerm() = g;
+  constantTerm() = f;
 }
 
 /* ************************************************************************* */
@@ -83,9 +106,9 @@ HessianFactor::HessianFactor(Key j, const Vector& mu, const Matrix& Sigma) :
   if (Sigma.rows() != Sigma.cols() || Sigma.rows() != mu.size())
     throw invalid_argument(
         "Attempting to construct HessianFactor with inconsistent matrix and/or vector dimensions");
-  info_(0, 0) = Sigma.inverse(); // G
-  info_(0, 1) = info_(0, 0).selfadjointView() * mu; // g
-  info_(1, 1)(0, 0) = mu.dot(info_(0, 1).knownOffDiagonal().col(0)); // f
+  info_.setDiagonalBlock(0, Sigma.inverse()); // G
+  linearTerm() = info_.diagonalBlock(0) * mu; // g
+  constantTerm() = mu.dot(linearTerm().col(0)); // f
 }
 
 /* ************************************************************************* */
@@ -94,12 +117,11 @@ HessianFactor::HessianFactor(Key j1, Key j2, const Matrix& G11,
     double f) :
     GaussianFactor(cref_list_of<2>(j1)(j2)), info_(
         cref_list_of<3>(G11.cols())(G22.cols())(1)) {
-  info_(0, 0) = G11;
-  info_(0, 1) = G12;
-  info_(0, 2) = g1;
-  info_(1, 1) = G22;
-  info_(1, 2) = g2;
-  info_(2, 2)(0, 0) = f;
+  info_.setDiagonalBlock(0, G11);
+  info_.setOffDiagonalBlock(0, 1, G12);
+  info_.setDiagonalBlock(1, G22);
+  linearTerm() << g1, g2;
+  constantTerm() = f;
 }
 
 /* ************************************************************************* */
@@ -115,16 +137,14 @@ HessianFactor::HessianFactor(Key j1, Key j2, Key j3, const Matrix& G11,
       || G22.cols() != g2.size() || G33.cols() != g3.size())
     throw invalid_argument(
         "Inconsistent matrix and/or vector dimensions in HessianFactor constructor");
-  info_(0, 0) = G11;
-  info_(0, 1) = G12;
-  info_(0, 2) = G13;
-  info_(0, 3) = g1;
-  info_(1, 1) = G22;
-  info_(1, 2) = G23;
-  info_(1, 3) = g2;
-  info_(2, 2) = G33;
-  info_(2, 3) = g3;
-  info_(3, 3)(0, 0) = f;
+  info_.setDiagonalBlock(0, G11);
+  info_.setOffDiagonalBlock(0, 1, G12);
+  info_.setOffDiagonalBlock(0, 2, G13);
+  info_.setDiagonalBlock(1, G22);
+  info_.setOffDiagonalBlock(1, 2, G23);
+  info_.setDiagonalBlock(2, G33);
+  linearTerm() << g1, g2, g3;
+  constantTerm() = f;
 }
 
 /* ************************************************************************* */
@@ -174,11 +194,16 @@ HessianFactor::HessianFactor(const std::vector<Key>& js,
   size_t index = 0;
   for (size_t i = 0; i < variable_count; ++i) {
     for (size_t j = i; j < variable_count; ++j) {
-      info_(i, j) = Gs[index++];
+      if (i == j) {
+        info_.setDiagonalBlock(i, Gs[index]);
+      } else {
+        info_.setOffDiagonalBlock(i, j, Gs[index]);
+      }
+      index++;
     }
-    info_(i, variable_count) = gs[i];
+    info_.setOffDiagonalBlock(i, variable_count, gs[i]);
   }
-  info_(variable_count, variable_count)(0, 0) = f;
+  constantTerm() = f;
 }
 
 /* ************************************************************************* */
@@ -186,17 +211,17 @@ namespace {
 void _FromJacobianHelper(const JacobianFactor& jf, SymmetricBlockMatrix& info) {
   gttic(HessianFactor_fromJacobian);
   const SharedDiagonal& jfModel = jf.get_model();
+  auto A = jf.matrixObject().full();
   if (jfModel) {
     if (jf.get_model()->isConstrained())
       throw invalid_argument(
           "Cannot construct HessianFactor from JacobianFactor with constrained noise model");
-    info.full().triangularView() =
-        jf.matrixObject().full().transpose()
-            * (jfModel->invsigmas().array() * jfModel->invsigmas().array()).matrix().asDiagonal()
-            * jf.matrixObject().full();
+
+    auto D = (jfModel->invsigmas().array() * jfModel->invsigmas().array()).matrix().asDiagonal();
+
+    info.setFullMatrix(A.transpose() * D * A);
   } else {
-    info.full().triangularView() = jf.matrixObject().full().transpose()
-        * jf.matrixObject().full();
+    info.setFullMatrix(A.transpose() * A);
   }
 }
 }
@@ -227,32 +252,13 @@ HessianFactor::HessianFactor(const GaussianFactor& gf) :
 HessianFactor::HessianFactor(const GaussianFactorGraph& factors,
     boost::optional<const Scatter&> scatter) {
   gttic(HessianFactor_MergeConstructor);
-  boost::optional<Scatter> computedScatter;
-  if (!scatter) {
-    computedScatter = Scatter(factors);
-    scatter = computedScatter;
-  }
 
-  // Allocate and copy keys
-  gttic(allocate);
-  // Allocate with dimensions for each variable plus 1 at the end for the information vector
-  const size_t n = scatter->size();
-  keys_.resize(n);
-  FastVector<DenseIndex> dims(n + 1);
-  DenseIndex slot = 0;
-  for(const SlotEntry& slotentry: *scatter) {
-    keys_[slot] = slotentry.key;
-    dims[slot] = slotentry.dimension;
-    ++slot;
-  }
-  dims.back() = 1;
-  info_ = SymmetricBlockMatrix(dims);
-  info_.full().triangularView().setZero();
-  gttoc(allocate);
+  Allocate(scatter ? *scatter : Scatter(factors));
 
   // Form A' * A
   gttic(update);
-  for(const GaussianFactor::shared_ptr& factor: factors)
+  info_.setZero();
+  for(const auto& factor: factors)
     if (factor)
       factor->updateHessian(keys_, &info_);
   gttoc(update);
@@ -266,7 +272,7 @@ void HessianFactor::print(const std::string& s,
   for (const_iterator key = begin(); key != end(); ++key)
     cout << formatter(*key) << "(" << getDim(key) << ") ";
   cout << "\n";
-  gtsam::print(Matrix(info_.full().selfadjointView()),
+  gtsam::print(Matrix(info_.selfadjointView()),
       "Augmented information matrix: ");
 }
 
@@ -281,22 +287,25 @@ bool HessianFactor::equals(const GaussianFactor& lf, double tol) const {
 
 /* ************************************************************************* */
 Matrix HessianFactor::augmentedInformation() const {
-  return info_.full().selfadjointView();
+  return info_.selfadjointView();
+}
+
+/* ************************************************************************* */
+Eigen::SelfAdjointView<SymmetricBlockMatrix::constBlock, Eigen::Upper>
+HessianFactor::informationView() const {
+  return info_.selfadjointView(0, size());
 }
 
 /* ************************************************************************* */
 Matrix HessianFactor::information() const {
-  return info_.range(0, size(), 0, size()).selfadjointView();
+  return informationView();
 }
 
 /* ************************************************************************* */
 VectorValues HessianFactor::hessianDiagonal() const {
   VectorValues d;
-  // Loop over all variables
-  for (DenseIndex j = 0; j < (DenseIndex) size(); ++j) {
-    // Get the diagonal block, and insert its diagonal
-    Matrix B = info_(j, j).selfadjointView();
-    d.insert(keys_[j], B.diagonal());
+  for (DenseIndex j = 0; j < (DenseIndex)size(); ++j) {
+    d.insert(keys_[j], info_.diagonal(j));
   }
   return d;
 }
@@ -314,8 +323,7 @@ map<Key, Matrix> HessianFactor::hessianBlockDiagonal() const {
   // Loop over all variables
   for (DenseIndex j = 0; j < (DenseIndex) size(); ++j) {
     // Get the diagonal block, and insert it
-    Matrix B = info_(j, j).selfadjointView();
-    blocks.insert(make_pair(keys_[j], B));
+    blocks.emplace(keys_[j], info_.diagonalBlock(j));
   }
   return blocks;
 }
@@ -334,28 +342,44 @@ std::pair<Matrix, Vector> HessianFactor::jacobian() const {
 double HessianFactor::error(const VectorValues& c) const {
   // error 0.5*(f - 2*x'*g + x'*G*x)
   const double f = constantTerm();
+  if (empty()) {
+    return 0.5 * f;
+  }
   double xtg = 0, xGx = 0;
   // extract the relevant subset of the VectorValues
   // NOTE may not be as efficient
   const Vector x = c.vector(keys());
-  xtg = x.dot(linearTerm());
-  xGx = x.transpose() * info_.range(0, size(), 0, size()).selfadjointView() * x;
+  xtg = x.dot(linearTerm().col(0));
+  auto AtA = informationView();
+  xGx = x.transpose() * AtA * x;
   return 0.5 * (f - 2.0 * xtg + xGx);
 }
 
 /* ************************************************************************* */
 void HessianFactor::updateHessian(const FastVector<Key>& infoKeys,
-    SymmetricBlockMatrix* info) const {
+                                  SymmetricBlockMatrix* info) const {
   gttic(updateHessian_HessianFactor);
+  assert(info);
   // Apply updates to the upper triangle
-  DenseIndex n = size(), N = info->nBlocks() - 1;
-  vector<DenseIndex> slots(n + 1);
-  for (DenseIndex j = 0; j <= n; ++j) {
-    const DenseIndex J = (j == n) ? N : Slot(infoKeys, keys_[j]);
+  DenseIndex nrVariablesInThisFactor = size(), nrBlocksInInfo = info->nBlocks() - 1;
+  vector<DenseIndex> slots(nrVariablesInThisFactor + 1);
+  // Loop over this factor's blocks with indices (i,j)
+  // For every block (i,j), we determine the block (I,J) in info.
+  for (DenseIndex j = 0; j <= nrVariablesInThisFactor; ++j) {
+    const bool rhs = (j == nrVariablesInThisFactor);
+    const DenseIndex J = rhs ? nrBlocksInInfo : Slot(infoKeys, keys_[j]);
     slots[j] = J;
     for (DenseIndex i = 0; i <= j; ++i) {
-      const DenseIndex I = slots[i]; // because i<=j, slots[i] is valid.
-      (*info)(I, J) += info_(i, j);
+      const DenseIndex I = slots[i];  // because i<=j, slots[i] is valid.
+
+      if (i == j) {
+        assert(I == J);
+        info->updateDiagonalBlock(I, info_.diagonalBlock(i));
+      } else {
+        assert(i < j);
+        assert(I != J);
+        info->updateOffDiagonalBlock(I, J, info_.aboveDiagonalBlock(i, j));
+      }
     }
   }
 }
@@ -363,8 +387,8 @@ void HessianFactor::updateHessian(const FastVector<Key>& infoKeys,
 /* ************************************************************************* */
 GaussianFactor::shared_ptr HessianFactor::negate() const {
   shared_ptr result = boost::make_shared<This>(*this);
-  result->info_.full().triangularView() =
-      -result->info_.full().triangularView().nestedExpression(); // Negate the information matrix of the result
+  // Negate the information matrix of the result
+  result->info_.negate();
   return result;
 }
 
@@ -386,12 +410,13 @@ void HessianFactor::multiplyHessianAdd(double alpha, const VectorValues& x,
     Vector xj = x.at(keys_[j]);
     DenseIndex i = 0;
     for (; i < j; ++i)
-      y[i] += info_(i, j).knownOffDiagonal() * xj;
+      y[i] += info_.aboveDiagonalBlock(i, j) * xj;
+
     // blocks on the diagonal are only half
-    y[i] += info_(j, j).selfadjointView() * xj;
+    y[i] += info_.diagonalBlock(j) * xj;
     // for below diagonal, we take transpose block from upper triangular part
     for (i = j + 1; i < (DenseIndex) size(); ++i)
-      y[i] += info_(i, j).knownOffDiagonal() * xj;
+      y[i] += info_.aboveDiagonalBlock(j, i).transpose() * xj;
   }
 
   // copy to yvalues
@@ -411,7 +436,7 @@ VectorValues HessianFactor::gradientAtZero() const {
   VectorValues g;
   size_t n = size();
   for (size_t j = 0; j < n; ++j)
-    g.insert(keys_[j], -info_(j, n).knownOffDiagonal());
+    g.insert(keys_[j], -info_.aboveDiagonalBlock(j, n));
   return g;
 }
 
@@ -424,31 +449,80 @@ void HessianFactor::gradientAtZero(double* d) const {
 
 /* ************************************************************************* */
 Vector HessianFactor::gradient(Key key, const VectorValues& x) const {
-  Factor::const_iterator i = find(key);
+  const Factor::const_iterator it_i = find(key);
+  const DenseIndex I = std::distance(begin(), it_i);
+
   // Sum over G_ij*xj for all xj connecting to xi
   Vector b = Vector::Zero(x.at(key).size());
-  for (Factor::const_iterator j = begin(); j != end(); ++j) {
+  for (Factor::const_iterator it_j = begin(); it_j != end(); ++it_j) {
+    const DenseIndex J = std::distance(begin(), it_j);
+
     // Obtain Gij from the Hessian factor
     // Hessian factor only stores an upper triangular matrix, so be careful when i>j
-    Matrix Gij;
-    if (i > j) {
-      Matrix Gji = info(j, i);
-      Gij = Gji.transpose();
-    } else {
-      Gij = info(i, j);
-    }
+    const Matrix Gij = info_.block(I, J);
     // Accumulate Gij*xj to gradf
-    b += Gij * x.at(*j);
+    b += Gij * x.at(*it_j);
   }
   // Subtract the linear term gi
-  b += -linearTerm(i);
+  b += -linearTerm(it_i);
   return b;
 }
 
 /* ************************************************************************* */
-std::pair<boost::shared_ptr<GaussianConditional>,
-    boost::shared_ptr<HessianFactor> > EliminateCholesky(
-    const GaussianFactorGraph& factors, const Ordering& keys) {
+boost::shared_ptr<GaussianConditional> HessianFactor::eliminateCholesky(const Ordering& keys) {
+  gttic(HessianFactor_eliminateCholesky);
+
+  GaussianConditional::shared_ptr conditional;
+
+  try {
+    // Do dense elimination
+    size_t nFrontals = keys.size();
+    assert(nFrontals <= size());
+    info_.choleskyPartial(nFrontals);
+
+    // TODO(frank): pre-allocate GaussianConditional and write into it
+    const VerticalBlockMatrix Ab = info_.split(nFrontals);
+    conditional = boost::make_shared<GaussianConditional>(keys_, nFrontals, Ab);
+
+    // Erase the eliminated keys in this factor
+    keys_.erase(begin(), begin() + nFrontals);
+  } catch (const CholeskyFailed& e) {
+    throw IndeterminantLinearSystemException(keys.front());
+  }
+
+  // Return result
+  return conditional;
+}
+
+/* ************************************************************************* */
+VectorValues HessianFactor::solve() {
+  gttic(HessianFactor_solve);
+
+  // Do Cholesky Factorization
+  const size_t n = size();
+  assert(info_.nBlocks() == n + 1);
+  info_.choleskyPartial(n);
+  auto R = info_.triangularView(0, n);
+  auto eta = linearTerm();
+
+  // Solve
+  const Vector solution = R.solve(eta);
+
+  // Parse into VectorValues
+  VectorValues delta;
+  std::size_t offset = 0;
+  for (DenseIndex j = 0; j < (DenseIndex)n; ++j) {
+    const DenseIndex dim = info_.getDim(j);
+    delta.insert(keys_[j], solution.segment(offset, dim));
+    offset += dim;
+  }
+
+  return delta;
+}
+
+/* ************************************************************************* */
+std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<HessianFactor> >
+EliminateCholesky(const GaussianFactorGraph& factors, const Ordering& keys) {
   gttic(EliminateCholesky);
 
   // Build joint factor
@@ -459,23 +533,11 @@ std::pair<boost::shared_ptr<GaussianConditional>,
   } catch (std::invalid_argument&) {
     throw InvalidDenseElimination(
         "EliminateCholesky was called with a request to eliminate variables that are not\n"
-            "involved in the provided factors.");
+        "involved in the provided factors.");
   }
 
   // Do dense elimination
-  GaussianConditional::shared_ptr conditional;
-  try {
-    size_t numberOfKeysToEliminate = keys.size();
-    VerticalBlockMatrix Ab = jointFactor->info_.choleskyPartial(
-        numberOfKeysToEliminate);
-    conditional = boost::make_shared<GaussianConditional>(jointFactor->keys(),
-        numberOfKeysToEliminate, Ab);
-    // Erase the eliminated keys in the remaining factor
-    jointFactor->keys_.erase(jointFactor->begin(),
-        jointFactor->begin() + numberOfKeysToEliminate);
-  } catch (const CholeskyFailed& e) {
-    throw IndeterminantLinearSystemException(keys.front());
-  }
+  auto conditional = jointFactor->eliminateCholesky(keys);
 
   // Return result
   return make_pair(conditional, jointFactor);
diff --git a/gtsam/linear/HessianFactor.h b/gtsam/linear/HessianFactor.h
index b74d557ea..e28bcdd81 100644
--- a/gtsam/linear/HessianFactor.h
+++ b/gtsam/linear/HessianFactor.h
@@ -35,12 +35,6 @@ namespace gtsam {
   class GaussianBayesNet;
   class GaussianFactorGraph;
 
-  GTSAM_EXPORT std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<GaussianFactor> >
-    EliminatePreferCholesky(const GaussianFactorGraph& factors, const Ordering& keys);
-
-  GTSAM_EXPORT std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<HessianFactor> >
-    EliminateCholesky(const GaussianFactorGraph& factors, const Ordering& keys);
-
   /**
    * @brief A Gaussian factor using the canonical parameters (information form)
    *
@@ -206,7 +200,9 @@ namespace gtsam {
      * @param variable An iterator pointing to the slot in this factor.  You can
      * use, for example, begin() + 2 to get the 3rd variable in this factor.
      */
-    virtual DenseIndex getDim(const_iterator variable) const { return info_(variable-this->begin(), 0).rows(); }
+    virtual DenseIndex getDim(const_iterator variable) const {
+      return info_.getDim(std::distance(begin(), variable));
+    }
 
     /** Return the number of columns and rows of the Hessian matrix, including the information vector. */
     size_t rows() const { return info_.rows(); }
@@ -217,80 +213,54 @@ namespace gtsam {
      * @return a HessianFactor with negated Hessian matrices
      */
     virtual GaussianFactor::shared_ptr negate() const;
-    
+
     /** Check if the factor is empty.  TODO: How should this be defined? */
     virtual bool empty() const { return size() == 0 /*|| rows() == 0*/; }
 
-    /** Return a view of the block at (j1,j2) of the <em>upper-triangular part</em> of the
-     * information matrix \f$ H \f$, no data is copied.  See HessianFactor class documentation
-     * above to explain that only the upper-triangular part of the information matrix is stored
-     * and returned by this function.
-     * @param j1 Which block row to get, as an iterator pointing to the slot in this factor.  You can
-     * use, for example, begin() + 2 to get the 3rd variable in this factor.
-     * @param j2 Which block column to get, as an iterator pointing to the slot in this factor.  You can
-     * use, for example, begin() + 2 to get the 3rd variable in this factor.
-     * @return A view of the requested block, not a copy.
+    /** Return the constant term \f$ f \f$ as described above
+     * @return The constant term \f$ f \f$
      */
-    constBlock info(const_iterator j1, const_iterator j2) const { return info_(j1-begin(), j2-begin()); }
-
-    /** Return a view of the block at (j1,j2) of the <em>upper-triangular part</em> of the
-     * information matrix \f$ H \f$, no data is copied.  See HessianFactor class documentation
-     * above to explain that only the upper-triangular part of the information matrix is stored
-     * and returned by this function.
-     * @param j1 Which block row to get, as an iterator pointing to the slot in this factor.  You can
-     * use, for example, begin() + 2 to get the 3rd variable in this factor.
-     * @param j2 Which block column to get, as an iterator pointing to the slot in this factor.  You can
-     * use, for example, begin() + 2 to get the 3rd variable in this factor.
-     * @return A view of the requested block, not a copy.
-     */
-    Block info(iterator j1, iterator j2) { return info_(j1-begin(), j2-begin()); }
-
-    /** Return the <em>upper-triangular part</em> of the full *augmented* information matrix,
-     * as described above.  See HessianFactor class documentation above to explain that only the
-     * upper-triangular part of the information matrix is stored and returned by this function.
-     */
-    SymmetricBlockMatrix::constBlock info() const { return info_.full(); }
-
-    /** Return the <em>upper-triangular part</em> of the full *augmented* information matrix,
-     * as described above.  See HessianFactor class documentation above to explain that only the
-     * upper-triangular part of the information matrix is stored and returned by this function.
-     */
-    SymmetricBlockMatrix::Block info() { return info_.full(); }
+    double constantTerm() const {
+      const auto view = info_.diagonalBlock(size());
+      return view(0, 0);
+    }
 
     /** Return the constant term \f$ f \f$ as described above
      * @return The constant term \f$ f \f$
      */
-    double constantTerm() const { return info_(this->size(), this->size())(0,0); }
-
-    /** Return the constant term \f$ f \f$ as described above
-     * @return The constant term \f$ f \f$
-     */
-    double& constantTerm() { return info_(this->size(), this->size())(0,0); }
+    double& constantTerm() { return info_.diagonalBlock(size())(0, 0); }
 
     /** Return the part of linear term \f$ g \f$ as described above corresponding to the requested variable.
      * @param j Which block row to get, as an iterator pointing to the slot in this factor.  You can
      * use, for example, begin() + 2 to get the 3rd variable in this factor.
      * @return The linear term \f$ g \f$ */
-    constBlock::OffDiagonal::ColXpr linearTerm(const_iterator j) const {
-      return info_(j-begin(), size()).knownOffDiagonal().col(0); }
-
-    /** Return the part of linear term \f$ g \f$ as described above corresponding to the requested variable.
-     * @param j Which block row to get, as an iterator pointing to the slot in this factor.  You can
-     * use, for example, begin() + 2 to get the 3rd variable in this factor.
-     * @return The linear term \f$ g \f$ */
-    Block::OffDiagonal::ColXpr linearTerm(iterator j) {
-      return info_(j-begin(), size()).knownOffDiagonal().col(0); }
+    SymmetricBlockMatrix::constBlock linearTerm(const_iterator j) const {
+      assert(!empty());
+      return info_.aboveDiagonalBlock(j - begin(), size());
+    }
 
     /** Return the complete linear term \f$ g \f$ as described above.
      * @return The linear term \f$ g \f$ */
-    constBlock::OffDiagonal::ColXpr linearTerm() const {
-      return info_.range(0, this->size(), this->size(), this->size() + 1).knownOffDiagonal().col(0); }
+    SymmetricBlockMatrix::constBlock linearTerm() const {
+      assert(!empty());
+      // get the last column (except the bottom right block)
+      return info_.aboveDiagonalRange(0, size(), size(), size() + 1);
+    }
 
     /** Return the complete linear term \f$ g \f$ as described above.
      * @return The linear term \f$ g \f$ */
-    Block::OffDiagonal::ColXpr linearTerm() {
-      return info_.range(0, this->size(), this->size(), this->size() + 1).knownOffDiagonal().col(0); }
-    
+    SymmetricBlockMatrix::Block linearTerm() {
+      assert(!empty());
+      return info_.aboveDiagonalRange(0, size(), size(), size() + 1);
+    }
+
+    /// Return underlying information matrix.
+    const SymmetricBlockMatrix& info() const { return info_; }
+
+    /// Return non-const information matrix.
+    /// TODO(gareth): Review the sanity of having non-const access to this.
+    SymmetricBlockMatrix& info() { return info_; }
+
     /** Return the augmented information matrix represented by this GaussianFactor.
      * The augmented information matrix contains the information matrix with an
      * additional column holding the information vector, and an additional row
@@ -308,6 +278,9 @@ namespace gtsam {
      */
     virtual Matrix augmentedInformation() const;
 
+    /// Return self-adjoint view onto the information matrix (NOT augmented).
+    Eigen::SelfAdjointView<SymmetricBlockMatrix::constBlock, Eigen::Upper> informationView() const;
+
     /** Return the non-augmented information matrix represented by this
      * GaussianFactor.
      */
@@ -322,11 +295,7 @@ namespace gtsam {
     /// Return the block diagonal of the Hessian for this factor
     virtual std::map<Key,Matrix> hessianBlockDiagonal() const;
 
-    /**
-     * Return (dense) matrix associated with factor
-     * @param ordering of variables needed for matrix column order
-     * @param set weight to true to bake in the weights
-     */
+    /// Return (dense) matrix associated with factor
     virtual std::pair<Matrix, Vector> jacobian() const;
 
     /**
@@ -336,16 +305,21 @@ namespace gtsam {
      */
     virtual Matrix augmentedJacobian() const;
 
-    /** Return the full augmented Hessian matrix of this factor as a SymmetricBlockMatrix object. */
-    const SymmetricBlockMatrix& matrixObject() const { return info_; }
-
     /** Update an information matrix by adding the information corresponding to this factor
      * (used internally during elimination).
-     * @param scatter A mapping from variable index to slot index in this HessianFactor
+     * @param keys THe ordered vector of keys for the information matrix to be updated
      * @param info The information matrix to be updated
      */
     void updateHessian(const FastVector<Key>& keys, SymmetricBlockMatrix* info) const;
 
+    /** Update another Hessian factor
+     * @param other the HessianFactor to be updated
+     */
+    void updateHessian(HessianFactor* other) const {
+      assert(other);
+      updateHessian(other->keys_, &other->info_);
+    }
+
     /** y += alpha * A'*A*x */
     void multiplyHessianAdd(double alpha, const VectorValues& x, VectorValues& y) const;
 
@@ -362,44 +336,33 @@ namespace gtsam {
     Vector gradient(Key key, const VectorValues& x) const;
 
     /**
-    *   Densely partially eliminate with Cholesky factorization.  JacobianFactors are
-    *   left-multiplied with their transpose to form the Hessian using the conversion constructor
-    *   HessianFactor(const JacobianFactor&).
-    *   
-    *   If any factors contain constrained noise models, this function will fail because our current
-    *   implementation cannot handle constrained noise models in Cholesky factorization.  The
-    *   function EliminatePreferCholesky() automatically does QR instead when this is the case.
-    *   
-    *   Variables are eliminated in the order specified in \c keys.
-    *   
-    *   @param factors Factors to combine and eliminate
-    *   @param keys The variables to eliminate and their elimination ordering
-    *   @return The conditional and remaining factor
-    *   
-    *   \addtogroup LinearSolving */
-    friend GTSAM_EXPORT std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<HessianFactor> >
-      EliminateCholesky(const GaussianFactorGraph& factors, const Ordering& keys);
+     *  In-place elimination that returns a conditional on (ordered) keys specified, and leaves
+     *  this factor to be on the remaining keys (separator) only. Does dense partial Cholesky.
+     */
+    boost::shared_ptr<GaussianConditional> eliminateCholesky(const Ordering& keys);
 
-    /**
-    *   Densely partially eliminate with Cholesky factorization.  JacobianFactors are
-    *   left-multiplied with their transpose to form the Hessian using the conversion constructor
-    *   HessianFactor(const JacobianFactor&).
-    *   
-    *   This function will fall back on QR factorization for any cliques containing JacobianFactor's
-    *   with constrained noise models.
-    *   
-    *   Variables are eliminated in the order specified in \c keys.
-    *   
-    *   @param factors Factors to combine and eliminate
-    *   @param keys The variables to eliminate and their elimination ordering
-    *   @return The conditional and remaining factor
-    *   
-    *   \addtogroup LinearSolving */
-    friend GTSAM_EXPORT std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<GaussianFactor> >
-      EliminatePreferCholesky(const GaussianFactorGraph& factors, const Ordering& keys);
+      /// Solve the system A'*A delta = A'*b in-place, return delta as VectorValues
+    VectorValues solve();
+
+
+#ifdef GTSAM_ALLOW_DEPRECATED_SINCE_V4
+    /// @name Deprecated
+    /// @{
+    const SymmetricBlockMatrix& matrixObject() const { return info_; }
+    /// @}
+#endif
 
   private:
 
+    /// Allocate for given scatter pattern
+    void Allocate(const Scatter& scatter);
+
+    /// Constructor with given scatter pattern, allocating but not initializing storage.
+    HessianFactor(const Scatter& scatter);
+
+    friend class NonlinearFactorGraph;
+    friend class NonlinearClusterTree;
+
     /** Serialization function */
     friend class boost::serialization::access;
     template<class ARCHIVE>
@@ -409,6 +372,43 @@ namespace gtsam {
     }
   };
 
+/**
+*   Densely partially eliminate with Cholesky factorization.  JacobianFactors are
+*   left-multiplied with their transpose to form the Hessian using the conversion constructor
+*   HessianFactor(const JacobianFactor&).
+*
+*   If any factors contain constrained noise models, this function will fail because our current
+*   implementation cannot handle constrained noise models in Cholesky factorization.  The
+*   function EliminatePreferCholesky() automatically does QR instead when this is the case.
+*
+*   Variables are eliminated in the order specified in \c keys.
+*
+*   @param factors Factors to combine and eliminate
+*   @param keys The variables to eliminate and their elimination ordering
+*   @return The conditional and remaining factor
+*
+*   \addtogroup LinearSolving */
+GTSAM_EXPORT std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<HessianFactor> >
+  EliminateCholesky(const GaussianFactorGraph& factors, const Ordering& keys);
+
+/**
+*   Densely partially eliminate with Cholesky factorization.  JacobianFactors are
+*   left-multiplied with their transpose to form the Hessian using the conversion constructor
+*   HessianFactor(const JacobianFactor&).
+*
+*   This function will fall back on QR factorization for any cliques containing JacobianFactor's
+*   with constrained noise models.
+*
+*   Variables are eliminated in the order specified in \c keys.
+*
+*   @param factors Factors to combine and eliminate
+*   @param keys The variables to eliminate and their elimination ordering
+*   @return The conditional and remaining factor
+*
+*   \addtogroup LinearSolving */
+GTSAM_EXPORT std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<GaussianFactor> >
+  EliminatePreferCholesky(const GaussianFactorGraph& factors, const Ordering& keys);
+
 /// traits
 template<>
 struct traits<HessianFactor> : public Testable<HessianFactor> {};
diff --git a/gtsam/linear/JacobianFactor.cpp b/gtsam/linear/JacobianFactor.cpp
index 999d7a325..06c8f3f64 100644
--- a/gtsam/linear/JacobianFactor.cpp
+++ b/gtsam/linear/JacobianFactor.cpp
@@ -104,10 +104,10 @@ JacobianFactor::JacobianFactor(const Key i1, const Matrix& A1, Key i2,
 /* ************************************************************************* */
 JacobianFactor::JacobianFactor(const HessianFactor& factor) :
     Base(factor), Ab_(
-        VerticalBlockMatrix::LikeActiveViewOf(factor.matrixObject(),
+        VerticalBlockMatrix::LikeActiveViewOf(factor.info(),
             factor.rows())) {
   // Copy Hessian into our matrix and then do in-place Cholesky
-  Ab_.full() = factor.matrixObject().full();
+  Ab_.full() = factor.info().selfadjointView();
 
   // Do Cholesky to get a Jacobian
   size_t maxrank;
@@ -532,10 +532,10 @@ void JacobianFactor::updateHessian(const FastVector<Key>& infoKeys,
       // Fill off-diagonal blocks with Ai'*Aj
       for (DenseIndex i = 0; i < j; ++i) {
         const DenseIndex I = slots[i];  // because i<j, slots[i] is valid.
-        (*info)(I, J).knownOffDiagonal() += Ab_(i).transpose() * Ab_j;
+        info->updateOffDiagonalBlock(I, J, Ab_(i).transpose() * Ab_j);
       }
       // Fill diagonal block with Aj'*Aj
-      (*info)(J, J).selfadjointView().rankUpdate(Ab_j.transpose());
+      info->diagonalBlock(J).rankUpdate(Ab_j.transpose());
     }
   }
 }
diff --git a/gtsam/linear/JacobianFactor.h b/gtsam/linear/JacobianFactor.h
index 3795f096e..14ff46241 100644
--- a/gtsam/linear/JacobianFactor.h
+++ b/gtsam/linear/JacobianFactor.h
@@ -37,6 +37,11 @@ namespace gtsam {
   class Ordering;
   class JacobianFactor;
 
+  /**
+   * Multiply all factors and eliminate the given keys from the resulting factor using a QR
+   * variant that handles constraints (zero sigmas). Computation happens in noiseModel::Gaussian::QR
+   * Returns a conditional on those keys, and a new factor on the separator.
+   */
   GTSAM_EXPORT std::pair<boost::shared_ptr<GaussianConditional>, boost::shared_ptr<JacobianFactor> >
     EliminateQR(const GaussianFactorGraph& factors, const Ordering& keys);
 
@@ -178,12 +183,12 @@ namespace gtsam {
      * which in fact stores an augmented information matrix.
      */
     virtual Matrix augmentedInformation() const;
-    
+
     /** Return the non-augmented information matrix represented by this
      * GaussianFactor.
      */
     virtual Matrix information() const;
-    
+
     /// Return the diagonal of the Hessian for this factor
     virtual VectorValues hessianDiagonal() const;
 
@@ -197,7 +202,7 @@ namespace gtsam {
      * @brief Returns (dense) A,b pair associated with factor, bakes in the weights
      */
     virtual std::pair<Matrix, Vector> jacobian() const;
-    
+
     /**
      * @brief Returns (dense) A,b pair associated with factor, does not bake in weights
      */
@@ -319,7 +324,7 @@ namespace gtsam {
 
     /** set noiseModel correctly */
     void setModel(bool anyConstrained, const Vector& sigmas);
-    
+
     /**
      * Densely partially eliminate with QR factorization, this is usually provided as an argument to
      * one of the factor graph elimination functions (see EliminateableFactorGraph).  HessianFactors
@@ -348,7 +353,7 @@ namespace gtsam {
     /// Internal function to fill blocks and set dimensions
     template<typename TERMS>
     void fillTerms(const TERMS& terms, const Vector& b, const SharedDiagonal& noiseModel);
-    
+
   private:
 
     /** Unsafe Constructor that creates an uninitialized Jacobian of right size
diff --git a/gtsam/linear/NoiseModel.cpp b/gtsam/linear/NoiseModel.cpp
index 02dd37ea9..4d9c7883b 100644
--- a/gtsam/linear/NoiseModel.cpp
+++ b/gtsam/linear/NoiseModel.cpp
@@ -886,6 +886,30 @@ DCS::shared_ptr DCS::Create(double c, const ReweightScheme reweight) {
   return shared_ptr(new DCS(c, reweight));
 }
 
+/* ************************************************************************* */
+// L2WithDeadZone
+/* ************************************************************************* */
+
+L2WithDeadZone::L2WithDeadZone(double k, const ReweightScheme reweight) : Base(reweight), k_(k) {
+  if (k_ <= 0) {
+    throw runtime_error("mEstimator L2WithDeadZone takes only positive double in constructor.");
+  }
+}
+
+void L2WithDeadZone::print(const std::string &s="") const {
+  std::cout << s << ": L2WithDeadZone (" << k_ << ")" << std::endl;
+}
+
+bool L2WithDeadZone::equals(const Base &expected, double tol) const {
+  const L2WithDeadZone* p = dynamic_cast<const L2WithDeadZone*>(&expected);
+  if (p == NULL) return false;
+  return fabs(k_ - p->k_) < tol;
+}
+
+L2WithDeadZone::shared_ptr L2WithDeadZone::Create(double k, const ReweightScheme reweight) {
+  return shared_ptr(new L2WithDeadZone(k, reweight));
+}
+
 } // namespace mEstimator
 
 /* ************************************************************************* */
diff --git a/gtsam/linear/NoiseModel.h b/gtsam/linear/NoiseModel.h
index 3a8a6744c..e495921c2 100644
--- a/gtsam/linear/NoiseModel.h
+++ b/gtsam/linear/NoiseModel.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -662,7 +662,29 @@ namespace gtsam {
         Base(const ReweightScheme reweight = Block):reweight_(reweight) {}
         virtual ~Base() {}
 
-        /// robust error function to implement
+        /*
+         * This method is responsible for returning the total penalty for a given amount of error.
+         * For example, this method is responsible for implementing the quadratic function for an
+         * L2 penalty, the absolute value function for an L1 penalty, etc.
+         *
+         * TODO(mike): There is currently a bug in GTSAM, where none of the mEstimator classes
+         * implement a residual function, and GTSAM calls the weight function to evaluate the
+         * total penalty, rather than calling the residual function. The weight function should be
+         * used during iteratively reweighted least squares optimization, but should not be used to
+         * evaluate the total penalty. The long-term solution is for all mEstimators to implement
+         * both a weight and a residual function, and for GTSAM to call the residual function when
+         * evaluating the total penalty. But for now, I'm leaving this residual method as pure
+         * virtual, so the existing mEstimators can inherit this default fallback behavior.
+         */
+        virtual double residual(double error) const { return 0; };
+
+        /*
+         * This method is responsible for returning the weight function for a given amount of error.
+         * The weight function is related to the analytic derivative of the residual function. See
+         *  http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html
+         * for details. This method is required when optimizing cost functions with robust penalties
+         * using iteratively re-weighted least squares.
+         */
         virtual double weight(double error) const = 0;
 
         virtual void print(const std::string &s) const = 0;
@@ -916,6 +938,45 @@ namespace gtsam {
         }
       };
 
+      /// L2WithDeadZone implements a standard L2 penalty, but with a dead zone of width 2*k,
+      /// centered at the origin. The resulting penalty within the dead zone is always zero, and
+      /// grows quadratically outside the dead zone. In this sense, the L2WithDeadZone penalty is
+      /// "robust to inliers", rather than being robust to outliers. This penalty can be used to
+      /// create barrier functions in a general way.
+      class GTSAM_EXPORT L2WithDeadZone : public Base {
+      protected:
+          double k_;
+
+      public:
+          typedef boost::shared_ptr<L2WithDeadZone> shared_ptr;
+
+          L2WithDeadZone(double k, const ReweightScheme reweight = Block);
+          double residual(double error) const {
+            const double abs_error = fabs(error);
+            return (abs_error < k_) ? 0.0 : 0.5*(k_-abs_error)*(k_-abs_error);
+          }
+          double weight(double error) const {
+            // note that this code is slightly uglier than above, because there are three distinct
+            // cases to handle (left of deadzone, deadzone, right of deadzone) instead of the two
+            // cases (deadzone, non-deadzone) above.
+            if (fabs(error) <= k_) return 0.0;
+            else if (error > k_) return (-k_+error)/error;
+            else return (k_+error)/error;
+          }
+          void print(const std::string &s) const;
+          bool equals(const Base& expected, double tol=1e-8) const;
+          static shared_ptr Create(double k, const ReweightScheme reweight = Block);
+
+      private:
+          /** Serialization function */
+          friend class boost::serialization::access;
+          template<class ARCHIVE>
+          void serialize(ARCHIVE & ar, const unsigned int /*version*/) {
+            ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Base);
+            ar & BOOST_SERIALIZATION_NVP(k_);
+          }
+      };
+
     } ///\namespace mEstimator
 
     /**
@@ -976,7 +1037,9 @@ namespace gtsam {
       { throw std::invalid_argument("unwhiten is not currently supported for robust noise models."); }
       inline virtual double distance(const Vector& v) const
       { return this->whiten(v).squaredNorm(); }
-
+      // TODO(mike): fold the use of the m-estimator residual(...) function into distance(...)
+      inline virtual double distance_non_whitened(const Vector& v) const
+      { return robust_->residual(v.norm()); }
       // TODO: these are really robust iterated re-weighting support functions
       virtual void WhitenSystem(Vector& b) const;
       virtual void WhitenSystem(std::vector<Matrix>& A, Vector& b) const;
@@ -997,7 +1060,7 @@ namespace gtsam {
         ar & boost::serialization::make_nvp("noise_", const_cast<NoiseModel::shared_ptr&>(noise_));
       }
     };
-    
+
     // Helper function
     GTSAM_EXPORT boost::optional<Vector> checkIfDiagonal(const Matrix M);
 
diff --git a/gtsam/linear/RegularHessianFactor.h b/gtsam/linear/RegularHessianFactor.h
index cf714e71d..f4df9d96b 100644
--- a/gtsam/linear/RegularHessianFactor.h
+++ b/gtsam/linear/RegularHessianFactor.h
@@ -125,12 +125,12 @@ public:
       const double* xj = x + key * D;
       DenseIndex i = 0;
       for (; i < j; ++i)
-        y_[i] += info_(i, j).knownOffDiagonal() * ConstDMap(xj);
+        y_[i] += info_.aboveDiagonalBlock(i, j) * ConstDMap(xj);
       // blocks on the diagonal are only half
-      y_[i] += info_(j, j).selfadjointView() * ConstDMap(xj);
+      y_[i] += info_.diagonalBlock(j) * ConstDMap(xj);
       // for below diagonal, we take transpose block from upper triangular part
       for (i = j + 1; i < (DenseIndex) size(); ++i)
-        y_[i] += info_(i, j).knownOffDiagonal() * ConstDMap(xj);
+        y_[i] += info_.aboveDiagonalBlock(j, i).transpose() * ConstDMap(xj);
     }
 
     // copy to yvalues
@@ -155,16 +155,16 @@ public:
     for (DenseIndex j = 0; j < (DenseIndex) size(); ++j) {
       DenseIndex i = 0;
       for (; i < j; ++i)
-        y_[i] += info_(i, j).knownOffDiagonal()
+        y_[i] += info_.aboveDiagonalBlock(i, j)
             * ConstDMap(x + offsets[keys_[j]],
                 offsets[keys_[j] + 1] - offsets[keys_[j]]);
       // blocks on the diagonal are only half
-      y_[i] += info_(j, j).selfadjointView()
+      y_[i] += info_.diagonalBlock(j)
           * ConstDMap(x + offsets[keys_[j]],
               offsets[keys_[j] + 1] - offsets[keys_[j]]);
       // for below diagonal, we take transpose block from upper triangular part
       for (i = j + 1; i < (DenseIndex) size(); ++i)
-        y_[i] += info_(i, j).knownOffDiagonal()
+        y_[i] += info_.aboveDiagonalBlock(j, i).transpose()
             * ConstDMap(x + offsets[keys_[j]],
                 offsets[keys_[j] + 1] - offsets[keys_[j]]);
     }
@@ -182,8 +182,7 @@ public:
     for (DenseIndex pos = 0; pos < (DenseIndex) size(); ++pos) {
       Key j = keys_[pos];
       // Get the diagonal block, and insert its diagonal
-      const Matrix& B = info_(pos, pos).selfadjointView();
-      DMap(d + D * j) += B.diagonal();
+      DMap(d + D * j) += info_.diagonal(pos);
     }
   }
 
@@ -194,8 +193,7 @@ public:
     for (DenseIndex pos = 0; pos < (DenseIndex) size(); ++pos) {
       Key j = keys_[pos];
       // Get the diagonal block, and insert its diagonal
-      VectorD dj = -info_(pos, size()).knownOffDiagonal();
-      DMap(d + D * j) += dj;
+      DMap(d + D * j) -= info_.aboveDiagonalBlock(pos, size());;
     }
   }
 
diff --git a/gtsam/linear/Scatter.cpp b/gtsam/linear/Scatter.cpp
index 719b3d16c..db5c25442 100644
--- a/gtsam/linear/Scatter.cpp
+++ b/gtsam/linear/Scatter.cpp
@@ -40,14 +40,15 @@ Scatter::Scatter(const GaussianFactorGraph& gfg,
 
   // If we have an ordering, pre-fill the ordered variables first
   if (ordering) {
-    for (Key key: *ordering) {
-      push_back(SlotEntry(key, 0));
+    for (Key key : *ordering) {
+      add(key, 0);
     }
   }
 
   // Now, find dimensions of variables and/or extend
-  for (const GaussianFactor::shared_ptr& factor: gfg) {
-    if (!factor) continue;
+  for (const auto& factor : gfg) {
+    if (!factor)
+      continue;
 
     // TODO: Fix this hack to cope with zero-row Jacobians that come from BayesTreeOrphanWrappers
     const JacobianFactor* asJacobian = dynamic_cast<const JacobianFactor*>(factor.get());
@@ -61,7 +62,7 @@ Scatter::Scatter(const GaussianFactorGraph& gfg,
       if (it!=end())
         it->dimension = factor->getDim(variable);
       else
-        push_back(SlotEntry(key, factor->getDim(variable)));
+        add(key, factor->getDim(variable));
     }
   }
 
@@ -74,6 +75,11 @@ Scatter::Scatter(const GaussianFactorGraph& gfg,
   erase(std::remove_if(begin(), end(), SlotEntry::Zero), end());
 }
 
+/* ************************************************************************* */
+void Scatter::add(Key key, size_t dim) {
+  emplace_back(SlotEntry(key, dim));
+}
+
 /* ************************************************************************* */
 FastVector<SlotEntry>::iterator Scatter::find(Key key) {
   iterator it = begin();
diff --git a/gtsam/linear/Scatter.h b/gtsam/linear/Scatter.h
index 39bfa6b8d..793961c59 100644
--- a/gtsam/linear/Scatter.h
+++ b/gtsam/linear/Scatter.h
@@ -50,10 +50,16 @@ struct GTSAM_EXPORT SlotEntry {
  */
 class Scatter : public FastVector<SlotEntry> {
  public:
-  /// Constructor
+  /// Default Constructor
+  Scatter() {}
+
+  /// Construct from gaussian factor graph, with optional (partial or complete) ordering
   Scatter(const GaussianFactorGraph& gfg,
           boost::optional<const Ordering&> ordering = boost::none);
 
+  /// Add a key/dim pair
+  void add(Key key, size_t dim);
+
  private:
 
   /// Find the SlotEntry with the right key (linear time worst case)
diff --git a/gtsam/linear/SubgraphSolver.cpp b/gtsam/linear/SubgraphSolver.cpp
index 70212b5b1..22d39a7f2 100644
--- a/gtsam/linear/SubgraphSolver.cpp
+++ b/gtsam/linear/SubgraphSolver.cpp
@@ -144,12 +144,12 @@ SubgraphSolver::splitGraph(const GaussianFactorGraph &jfg) {
     if (gf->keys().size() == 1)
       augment = true;
     else {
-      const Key u = gf->keys()[0], v = gf->keys()[1], u_root = D.findSet(u),
-          v_root = D.findSet(v);
+      const Key u = gf->keys()[0], v = gf->keys()[1], u_root = D.find(u),
+          v_root = D.find(v);
       if (u_root != v_root) {
         t++;
         augment = true;
-        D.makeUnionInPlace(u_root, v_root);
+        D.merge(u_root, v_root);
       }
     }
     if (augment)
diff --git a/gtsam/linear/VectorValues.cpp b/gtsam/linear/VectorValues.cpp
index bce339dca..35afddb3a 100644
--- a/gtsam/linear/VectorValues.cpp
+++ b/gtsam/linear/VectorValues.cpp
@@ -47,15 +47,24 @@ namespace gtsam {
   VectorValues::VectorValues(const Vector& x, const Dims& dims) {
     typedef pair<Key, size_t> Pair;
     size_t j = 0;
-    for(const Pair& v: dims) {
+    for (const Pair& v : dims) {
       Key key;
       size_t n;
       boost::tie(key, n) = v;
-      values_.insert(make_pair(key, x.segment(j,n)));
+      values_.insert(make_pair(key, x.segment(j, n)));
       j += n;
     }
   }
 
+  /* ************************************************************************* */
+  VectorValues::VectorValues(const Vector& x, const Scatter& scatter) {
+    size_t j = 0;
+    for (const SlotEntry& v : scatter) {
+      values_.insert(make_pair(v.key, x.segment(j, v.dimension)));
+      j += v.dimension;
+    }
+  }
+
   /* ************************************************************************* */
   VectorValues VectorValues::Zero(const VectorValues& other)
   {
diff --git a/gtsam/linear/VectorValues.h b/gtsam/linear/VectorValues.h
index 0c752728a..cb1e08f2d 100644
--- a/gtsam/linear/VectorValues.h
+++ b/gtsam/linear/VectorValues.h
@@ -17,11 +17,12 @@
 
 #pragma once
 
+#include <gtsam/linear/Scatter.h>
+#include <gtsam/inference/Ordering.h>
 #include <gtsam/base/Vector.h>
 #include <gtsam/base/ConcurrentMap.h>
 #include <gtsam/base/FastVector.h>
 #include <gtsam/global_includes.h>
-#include <gtsam/inference/Ordering.h>
 
 #include <boost/shared_ptr.hpp>
 
@@ -124,9 +125,12 @@ namespace gtsam {
     template<typename ITERATOR>
     VectorValues(ITERATOR first, ITERATOR last) : values_(first, last) {}
 
-    /** Constructor from Vector. */
+    /// Constructor from Vector, with Dims
     VectorValues(const Vector& c, const Dims& dims);
 
+    /// Constructor from Vector, with Scatter
+    VectorValues(const Vector& c, const Scatter& scatter);
+
     /** Create a VectorValues with the same structure as \c other, but filled with zeros. */
     static VectorValues Zero(const VectorValues& other);
 
diff --git a/gtsam/linear/linearAlgorithms-inst.h b/gtsam/linear/linearAlgorithms-inst.h
index 0583d1803..ac8c2d7c7 100644
--- a/gtsam/linear/linearAlgorithms-inst.h
+++ b/gtsam/linear/linearAlgorithms-inst.h
@@ -57,8 +57,8 @@ namespace gtsam
           // Take any ancestor results we'll need
           for(Key parent: clique->conditional_->parents())
             myData.cliqueResults.insert(std::make_pair(parent, myData.parentData->cliqueResults.at(parent)));
+
           // Solve and store in our results
-          //collectedResult.insert(clique->conditional()->solve(collectedResult/*myData.ancestorResults*/));
           {
             GaussianConditional& c = *clique->conditional();
             // Solve matrix
@@ -82,17 +82,24 @@ namespace gtsam
                 vectorPos += parentVector.size();
               }
             }
-            xS = c.getb() - c.get_S() * xS;
-            Vector soln = c.get_R().triangularView<Eigen::Upper>().solve(xS);
+
+            // NOTE(gareth): We can no longer write: xS = b - S * xS
+            // This is because Eigen (as of 3.3) no longer evaluates S * xS into
+            // a temporary, and the operation trashes valus in xS.
+            // See: http://eigen.tuxfamily.org/index.php?title=3.3
+            const Vector rhs = c.getb() - c.get_S() * xS;
+
+            // TODO(gareth): Inline instantiation of Eigen::Solve and check flag
+            const Vector solution = c.get_R().triangularView<Eigen::Upper>().solve(rhs);
 
             // Check for indeterminant solution
-            if(soln.hasNaN()) throw IndeterminantLinearSystemException(c.keys().front());
+            if(solution.hasNaN()) throw IndeterminantLinearSystemException(c.keys().front());
 
             // Insert solution into a VectorValues
             DenseIndex vectorPosition = 0;
             for(GaussianConditional::const_iterator frontal = c.beginFrontals(); frontal != c.endFrontals(); ++frontal) {
               VectorValues::const_iterator r =
-                collectedResult.insert(*frontal, soln.segment(vectorPosition, c.getDim(frontal)));
+                collectedResult.insert(*frontal, solution.segment(vectorPosition, c.getDim(frontal)));
               myData.cliqueResults.insert(make_pair(r->first, r));
               vectorPosition += c.getDim(frontal);
             }
diff --git a/gtsam/linear/tests/testGaussianBayesNet.cpp b/gtsam/linear/tests/testGaussianBayesNet.cpp
index 2a1dc088f..89b44f1f8 100644
--- a/gtsam/linear/tests/testGaussianBayesNet.cpp
+++ b/gtsam/linear/tests/testGaussianBayesNet.cpp
@@ -36,17 +36,23 @@ using namespace gtsam;
 
 static const Key _x_=0, _y_=1;
 
-static GaussianBayesNet smallBayesNet = list_of
-  (GaussianConditional(_x_, (Vector(1) << 9.0).finished(), (Matrix(1, 1) << 1.0).finished(), _y_, (Matrix(1, 1) << 1.0).finished()))
-  (GaussianConditional(_y_, (Vector(1) << 5.0).finished(), (Matrix(1, 1) << 1.0).finished()));
+static GaussianBayesNet smallBayesNet =
+    list_of(GaussianConditional(_x_, Vector1::Constant(9), I_1x1, _y_, I_1x1))(
+        GaussianConditional(_y_, Vector1::Constant(5), I_1x1));
+
+static GaussianBayesNet noisyBayesNet =
+    list_of(GaussianConditional(_x_, Vector1::Constant(9), I_1x1, _y_, I_1x1,
+                                noiseModel::Diagonal::Sigmas(Vector1::Constant(2))))(
+        GaussianConditional(_y_, Vector1::Constant(5), I_1x1,
+                            noiseModel::Diagonal::Sigmas(Vector1::Constant(3))));
 
 /* ************************************************************************* */
-TEST( GaussianBayesNet, matrix )
+TEST( GaussianBayesNet, Matrix )
 {
   Matrix R; Vector d;
   boost::tie(R,d) = smallBayesNet.matrix(); // find matrix and RHS
 
-  Matrix R1 = (Matrix(2, 2) <<
+  Matrix R1 = (Matrix2() <<
           1.0, 1.0,
           0.0, 1.0
     ).finished();
@@ -57,31 +63,55 @@ TEST( GaussianBayesNet, matrix )
 }
 
 /* ************************************************************************* */
-TEST( GaussianBayesNet, optimize )
+TEST( GaussianBayesNet, NoisyMatrix )
 {
+  Matrix R; Vector d;
+  boost::tie(R,d) = noisyBayesNet.matrix(); // find matrix and RHS
+
+  Matrix R1 = (Matrix2() <<
+          0.5, 0.5,
+          0.0, 1./3.
+    ).finished();
+  Vector d1 = Vector2(9./2., 5./3.);
+
+  EXPECT(assert_equal(R,R1));
+  EXPECT(assert_equal(d,d1));
+}
+
+/* ************************************************************************* */
+TEST(GaussianBayesNet, Optimize) {
+  VectorValues expected =
+      map_list_of<Key, Vector>(_x_, Vector1::Constant(4))(_y_, Vector1::Constant(5));
   VectorValues actual = smallBayesNet.optimize();
+  EXPECT(assert_equal(expected, actual));
+}
 
-  VectorValues expected = map_list_of<Key, Vector>
-    (_x_, (Vector(1) << 4.0).finished())
-    (_y_, (Vector(1) << 5.0).finished());
+/* ************************************************************************* */
+TEST(GaussianBayesNet, NoisyOptimize) {
+  Matrix R;
+  Vector d;
+  boost::tie(R, d) = noisyBayesNet.matrix();  // find matrix and RHS
+  const Vector x = R.inverse() * d;
+  VectorValues expected = map_list_of<Key, Vector>(_x_, x.head(1))(_y_, x.tail(1));
 
-  EXPECT(assert_equal(expected,actual));
+  VectorValues actual = noisyBayesNet.optimize();
+  EXPECT(assert_equal(expected, actual));
 }
 
 /* ************************************************************************* */
 TEST( GaussianBayesNet, optimizeIncomplete )
 {
   static GaussianBayesNet incompleteBayesNet = list_of
-    (GaussianConditional(_x_, (Vector(1) << 9.0).finished(), (Matrix(1, 1) << 1.0).finished(), _y_, (Matrix(1, 1) << 1.0).finished()));
+    (GaussianConditional(_x_, Vector1::Constant(9), I_1x1, _y_, I_1x1));
 
   VectorValues solutionForMissing = map_list_of<Key, Vector>
-    (_y_, (Vector(1) << 5.0).finished());
+    (_y_, Vector1::Constant(5));
 
   VectorValues actual = incompleteBayesNet.optimize(solutionForMissing);
 
   VectorValues expected = map_list_of<Key, Vector>
-    (_x_, (Vector(1) << 4.0).finished())
-    (_y_, (Vector(1) << 5.0).finished());
+    (_x_, Vector1::Constant(4))
+    (_y_, Vector1::Constant(5));
 
   EXPECT(assert_equal(expected,actual));
 }
@@ -95,13 +125,13 @@ TEST( GaussianBayesNet, optimize3 )
   // NOTE: we are supplying a new RHS here
 
   VectorValues expected = map_list_of<Key, Vector>
-    (_x_, (Vector(1) << -1.0).finished())
-    (_y_, (Vector(1) <<  5.0).finished());
+    (_x_, Vector1::Constant(-1))
+    (_y_, Vector1::Constant(5));
 
   // Test different RHS version
   VectorValues gx = map_list_of<Key, Vector>
-    (_x_, (Vector(1) << 4.0).finished())
-    (_y_, (Vector(1) << 5.0).finished());
+    (_x_, Vector1::Constant(4))
+    (_y_, Vector1::Constant(5));
   VectorValues actual = smallBayesNet.backSubstitute(gx);
   EXPECT(assert_equal(expected, actual));
 }
@@ -114,11 +144,11 @@ TEST( GaussianBayesNet, backSubstituteTranspose )
   // 5   1 1  3
   VectorValues
     x = map_list_of<Key, Vector>
-      (_x_, (Vector(1) << 2.0).finished())
-      (_y_, (Vector(1) << 5.0).finished()),
+      (_x_, Vector1::Constant(2))
+      (_y_, Vector1::Constant(5)),
     expected = map_list_of<Key, Vector>
-      (_x_, (Vector(1) << 2.0).finished())
-      (_y_, (Vector(1) << 3.0).finished());
+      (_x_, Vector1::Constant(2))
+      (_y_, Vector1::Constant(3));
 
   VectorValues actual = smallBayesNet.backSubstituteTranspose(x);
   EXPECT(assert_equal(expected, actual));
@@ -130,15 +160,15 @@ TEST( GaussianBayesNet, DeterminantTest )
 {
   GaussianBayesNet cbn;
   cbn += GaussianConditional(
-          0, Vector2(3.0, 4.0 ), (Matrix(2, 2) << 1.0, 3.0, 0.0, 4.0 ).finished(),
-          1, (Matrix(2, 2) << 2.0, 1.0, 2.0, 3.0).finished(), noiseModel::Isotropic::Sigma(2, 2.0));
+          0, Vector2(3.0, 4.0), (Matrix2() << 1.0, 3.0, 0.0, 4.0).finished(),
+          1, (Matrix2() << 2.0, 1.0, 2.0, 3.0).finished(), noiseModel::Isotropic::Sigma(2, 2.0));
 
   cbn += GaussianConditional(
-          1, Vector2(5.0, 6.0 ), (Matrix(2, 2) << 1.0, 1.0, 0.0, 3.0 ).finished(),
-          2, (Matrix(2, 2) << 1.0, 0.0, 5.0, 2.0).finished(), noiseModel::Isotropic::Sigma(2, 2.0));
+          1, Vector2(5.0, 6.0), (Matrix2() << 1.0, 1.0, 0.0, 3.0).finished(),
+          2, (Matrix2() << 1.0, 0.0, 5.0, 2.0).finished(), noiseModel::Isotropic::Sigma(2, 2.0));
 
   cbn += GaussianConditional(
-      3, Vector2(7.0, 8.0 ), (Matrix(2, 2) << 1.0, 1.0, 0.0, 5.0 ).finished(), noiseModel::Isotropic::Sigma(2, 2.0));
+      3, Vector2(7.0, 8.0), (Matrix2() << 1.0, 1.0, 0.0, 5.0).finished(), noiseModel::Isotropic::Sigma(2, 2.0));
 
   double expectedDeterminant = 60.0 / 64.0;
   double actualDeterminant = cbn.determinant();
@@ -161,21 +191,21 @@ TEST(GaussianBayesNet, ComputeSteepestDescentPoint) {
   // Create an arbitrary Bayes Net
   GaussianBayesNet gbn;
   gbn += GaussianConditional::shared_ptr(new GaussianConditional(
-    0, Vector2(1.0,2.0), (Matrix(2, 2) << 3.0,4.0,0.0,6.0).finished(),
-    3, (Matrix(2, 2) << 7.0,8.0,9.0,10.0).finished(),
-    4, (Matrix(2, 2) << 11.0,12.0,13.0,14.0).finished()));
+    0, Vector2(1.0,2.0), (Matrix2() << 3.0,4.0,0.0,6.0).finished(),
+    3, (Matrix2() << 7.0,8.0,9.0,10.0).finished(),
+    4, (Matrix2() << 11.0,12.0,13.0,14.0).finished()));
   gbn += GaussianConditional::shared_ptr(new GaussianConditional(
-    1, Vector2(15.0,16.0), (Matrix(2, 2) << 17.0,18.0,0.0,20.0).finished(),
-    2, (Matrix(2, 2) << 21.0,22.0,23.0,24.0).finished(),
-    4, (Matrix(2, 2) << 25.0,26.0,27.0,28.0).finished()));
+    1, Vector2(15.0,16.0), (Matrix2() << 17.0,18.0,0.0,20.0).finished(),
+    2, (Matrix2() << 21.0,22.0,23.0,24.0).finished(),
+    4, (Matrix2() << 25.0,26.0,27.0,28.0).finished()));
   gbn += GaussianConditional::shared_ptr(new GaussianConditional(
-    2, Vector2(29.0,30.0), (Matrix(2, 2) << 31.0,32.0,0.0,34.0).finished(),
-    3, (Matrix(2, 2) << 35.0,36.0,37.0,38.0).finished()));
+    2, Vector2(29.0,30.0), (Matrix2() << 31.0,32.0,0.0,34.0).finished(),
+    3, (Matrix2() << 35.0,36.0,37.0,38.0).finished()));
   gbn += GaussianConditional::shared_ptr(new GaussianConditional(
-    3, Vector2(39.0,40.0), (Matrix(2, 2) << 41.0,42.0,0.0,44.0).finished(),
-    4, (Matrix(2, 2) << 45.0,46.0,47.0,48.0).finished()));
+    3, Vector2(39.0,40.0), (Matrix2() << 41.0,42.0,0.0,44.0).finished(),
+    4, (Matrix2() << 45.0,46.0,47.0,48.0).finished()));
   gbn += GaussianConditional::shared_ptr(new GaussianConditional(
-    4, Vector2(49.0,50.0), (Matrix(2, 2) << 51.0,52.0,0.0,54.0).finished()));
+    4, Vector2(49.0,50.0), (Matrix2() << 51.0,52.0,0.0,54.0).finished()));
 
   // Compute the Hessian numerically
   Matrix hessian = numericalHessian<Vector10>(
diff --git a/gtsam/linear/tests/testGaussianBayesTree.cpp b/gtsam/linear/tests/testGaussianBayesTree.cpp
index 738a6d59f..e5634c357 100644
--- a/gtsam/linear/tests/testGaussianBayesTree.cpp
+++ b/gtsam/linear/tests/testGaussianBayesTree.cpp
@@ -130,7 +130,7 @@ TEST(GaussianBayesTree, complicatedMarginal) {
   // Create the conditionals to go in the BayesTree
   GaussianBayesTree bt;
   bt.insertRoot(
-    MakeClique(GaussianConditional(pair_list_of<Key, Matrix> (11, (Matrix(3,1) << 0.0971, 0, 0).finished()) 
+    MakeClique(GaussianConditional(pair_list_of<Key, Matrix> (11, (Matrix(3,1) << 0.0971, 0, 0).finished())
                                                          (12, (Matrix(3,2) << 0.3171, 0.4387,  0.9502, 0.3816,  0, 0.7655).finished()),
                                             2, Vector3(0.2638, 0.1455, 0.1361)), list_of
       (MakeClique(GaussianConditional(pair_list_of<Key, Matrix> (9, (Matrix(3,1) << 0.7952, 0, 0).finished())
diff --git a/gtsam/linear/tests/testGaussianFactorGraph.cpp b/gtsam/linear/tests/testGaussianFactorGraph.cpp
index 8f936e4b2..45f652d05 100644
--- a/gtsam/linear/tests/testGaussianFactorGraph.cpp
+++ b/gtsam/linear/tests/testGaussianFactorGraph.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -27,7 +27,7 @@
 #include <gtsam/base/VerticalBlockMatrix.h>
 
 #include <boost/assign/list_of.hpp>
-#include <boost/assign/std/list.hpp> // for operator +=
+#include <boost/assign/std/list.hpp>  // for operator +=
 using namespace boost::assign;
 
 #include <gtsam/base/TestableAssertions.h>
@@ -36,7 +36,7 @@ using namespace boost::assign;
 using namespace std;
 using namespace gtsam;
 
-//static SharedDiagonal
+// static SharedDiagonal
 //  sigma0_1 = noiseModel::Isotropic::Sigma(2,0.1), sigma_02 = noiseModel::Isotropic::Sigma(2,0.2),
 //  constraintModel = noiseModel::Constrained::All(2);
 
@@ -56,11 +56,11 @@ TEST(GaussianFactorGraph, initialization) {
 
   // Test sparse, which takes a vector and returns a matrix, used in MATLAB
   // Note that this the augmented vector and the RHS is in column 7
-  Matrix expectedIJS = (Matrix(3, 22) <<
-          1.,   2.,  1.,  2.,     3.,   4.,   3.,   4.,  3.,  4.,    5.,  6., 5., 6., 5., 6.,    7.,  8., 7., 8.,  7., 8.,
-          1.,   2.,  7.,  7.,     1.,   2.,   3.,   4.,  7.,  7.,    1.,  2., 5., 6., 7., 7.,    3.,  4., 5., 6.,  7., 7.,
-          10., 10., -1., -1.,   -10., -10.,  10.,  10.,  2., -1.,   -5., -5., 5., 5., 0., 1.,   -5., -5., 5., 5., -1., 1.5
-  ).finished();
+  Matrix expectedIJS =
+      (Matrix(3, 22) << 1., 2., 1., 2., 3., 4., 3., 4., 3., 4., 5., 6., 5., 6., 5., 6., 7., 8., 7.,
+       8., 7., 8., 1., 2., 7., 7., 1., 2., 3., 4., 7., 7., 1., 2., 5., 6., 7., 7., 3., 4., 5., 6.,
+       7., 7., 10., 10., -1., -1., -10., -10., 10., 10., 2., -1., -5., -5., 5., 5., 0., 1., -5.,
+       -5., 5., 5., -1., 1.5).finished();
   Matrix actualIJS = fg.sparseJacobian_();
   EQUALITY(expectedIJS, actualIJS);
 }
@@ -98,7 +98,8 @@ TEST(GaussianFactorGraph, sparseJacobian) {
   GaussianFactorGraph gfg;
   SharedDiagonal model = noiseModel::Isotropic::Sigma(2, 0.5);
   gfg.add(0, (Matrix(2, 3) << 1., 2., 3., 5., 6., 7.).finished(), Vector2(4., 8.), model);
-  gfg.add(0, (Matrix(2, 3) << 9.,10., 0., 0., 0., 0.).finished(), 1, (Matrix(2, 2) << 11., 12., 14., 15.).finished(), Vector2(13.,16.), model);
+  gfg.add(0, (Matrix(2, 3) << 9., 10., 0., 0., 0., 0.).finished(), 1,
+          (Matrix(2, 2) << 11., 12., 14., 15.).finished(), Vector2(13., 16.), model);
 
   Matrix actual = gfg.sparseJacobian_();
 
@@ -121,73 +122,73 @@ TEST(GaussianFactorGraph, matrices) {
   GaussianFactorGraph gfg;
   SharedDiagonal model = noiseModel::Unit::Create(2);
   gfg.add(0, A00, Vector2(4., 8.), model);
-  gfg.add(0, A10, 1, A11, Vector2(13.,16.), model);
+  gfg.add(0, A10, 1, A11, Vector2(13., 16.), model);
 
-  Matrix Ab(4,6);
-  Ab <<
-      1, 2, 3, 0, 0, 4,
-      5, 6, 7, 0, 0, 8,
-      9,10, 0,11,12,13,
-      0, 0, 0,14,15,16;
+  Matrix Ab(4, 6);
+  Ab << 1, 2, 3, 0, 0, 4, 5, 6, 7, 0, 0, 8, 9, 10, 0, 11, 12, 13, 0, 0, 0, 14, 15, 16;
 
   // augmented versions
   EXPECT(assert_equal(Ab, gfg.augmentedJacobian()));
   EXPECT(assert_equal(Ab.transpose() * Ab, gfg.augmentedHessian()));
 
   // jacobian
-  Matrix A = Ab.leftCols(Ab.cols()-1);
-  Vector b = Ab.col(Ab.cols()-1);
-  Matrix actualA; Vector actualb; boost::tie(actualA,actualb) = gfg.jacobian();
+  Matrix A = Ab.leftCols(Ab.cols() - 1);
+  Vector b = Ab.col(Ab.cols() - 1);
+  Matrix actualA;
+  Vector actualb;
+  boost::tie(actualA, actualb) = gfg.jacobian();
   EXPECT(assert_equal(A, actualA));
   EXPECT(assert_equal(b, actualb));
 
   // hessian
   Matrix L = A.transpose() * A;
   Vector eta = A.transpose() * b;
-  Matrix actualL; Vector actualeta; boost::tie(actualL,actualeta) = gfg.hessian();
+  Matrix actualL;
+  Vector actualeta;
+  boost::tie(actualL, actualeta) = gfg.hessian();
   EXPECT(assert_equal(L, actualL));
   EXPECT(assert_equal(eta, actualeta));
 
   // hessianBlockDiagonal
-  VectorValues expectLdiagonal; // Make explicit that diagonal is sum-squares of columns
-  expectLdiagonal.insert(0, Vector3(1+25+81, 4+36+100, 9+49));
-  expectLdiagonal.insert(1, Vector2(121+196, 144+225));
+  VectorValues expectLdiagonal;  // Make explicit that diagonal is sum-squares of columns
+  expectLdiagonal.insert(0, Vector3(1 + 25 + 81, 4 + 36 + 100, 9 + 49));
+  expectLdiagonal.insert(1, Vector2(121 + 196, 144 + 225));
   EXPECT(assert_equal(expectLdiagonal, gfg.hessianDiagonal()));
 
   // hessianBlockDiagonal
-  map<Key,Matrix> actualBD = gfg.hessianBlockDiagonal();
-  LONGS_EQUAL(2,actualBD.size());
-  EXPECT(assert_equal(A00.transpose()*A00 + A10.transpose()*A10,actualBD[0]));
-  EXPECT(assert_equal(A11.transpose()*A11,actualBD[1]));
+  map<Key, Matrix> actualBD = gfg.hessianBlockDiagonal();
+  LONGS_EQUAL(2, actualBD.size());
+  EXPECT(assert_equal(A00.transpose() * A00 + A10.transpose() * A10, actualBD[0]));
+  EXPECT(assert_equal(A11.transpose() * A11, actualBD[1]));
 }
 
 /* ************************************************************************* */
+/// Factor graph with 2 2D factors on 3 2D variables
 static GaussianFactorGraph createSimpleGaussianFactorGraph() {
   GaussianFactorGraph fg;
+  Key x1 = 2, x2 = 0, l1 = 1;
   SharedDiagonal unit2 = noiseModel::Unit::Create(2);
   // linearized prior on x1: c[_x1_]+x1=0 i.e. x1=-c[_x1_]
-  fg += JacobianFactor(2, 10*I_2x2, -1.0*Vector::Ones(2), unit2);
+  fg += JacobianFactor(x1, 10 * I_2x2, -1.0 * Vector::Ones(2), unit2);
   // odometry between x1 and x2: x2-x1=[0.2;-0.1]
-  fg += JacobianFactor(0, 10*I_2x2, 2, -10*I_2x2, Vector2(2.0, -1.0), unit2);
+  fg += JacobianFactor(x2, 10 * I_2x2, x1, -10 * I_2x2, Vector2(2.0, -1.0), unit2);
   // measurement between x1 and l1: l1-x1=[0.0;0.2]
-  fg += JacobianFactor(1, 5*I_2x2, 2, -5*I_2x2, Vector2(0.0, 1.0), unit2);
+  fg += JacobianFactor(l1, 5 * I_2x2, x1, -5 * I_2x2, Vector2(0.0, 1.0), unit2);
   // measurement between x2 and l1: l1-x2=[-0.2;0.3]
-  fg += JacobianFactor(0, -5*I_2x2, 1, 5*I_2x2, Vector2(-1.0, 1.5), unit2);
+  fg += JacobianFactor(x2, -5 * I_2x2, l1, 5 * I_2x2, Vector2(-1.0, 1.5), unit2);
   return fg;
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, gradient )
-{
+TEST(GaussianFactorGraph, gradient) {
   GaussianFactorGraph fg = createSimpleGaussianFactorGraph();
 
   // Construct expected gradient
-  // 2*f(x) = 100*(x1+c[X(1)])^2 + 100*(x2-x1-[0.2;-0.1])^2 + 25*(l1-x1-[0.0;0.2])^2 + 25*(l1-x2-[-0.2;0.3])^2
+  // 2*f(x) = 100*(x1+c[X(1)])^2 + 100*(x2-x1-[0.2;-0.1])^2 + 25*(l1-x1-[0.0;0.2])^2 +
+  // 25*(l1-x2-[-0.2;0.3])^2
   // worked out: df/dx1 = 100*[0.1;0.1] + 100*[0.2;-0.1]) + 25*[0.0;0.2] = [10+20;10-10+5] = [30;5]
-  VectorValues expected = map_list_of<Key, Vector>
-    (1, Vector2(5.0, -12.5))
-    (2, Vector2(30.0, 5.0))
-    (0, Vector2(-25.0, 17.5));
+  VectorValues expected = map_list_of<Key, Vector>(1, Vector2(5.0, -12.5))(2, Vector2(30.0, 5.0))(
+      0, Vector2(-25.0, 17.5));
 
   // Check the gradient at delta=0
   VectorValues zero = VectorValues::Zero(expected);
@@ -202,18 +203,14 @@ TEST( GaussianFactorGraph, gradient )
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, transposeMultiplication )
-{
+TEST(GaussianFactorGraph, transposeMultiplication) {
   GaussianFactorGraph A = createSimpleGaussianFactorGraph();
 
-  Errors e; e +=
-    Vector2(0.0, 0.0),
-    Vector2(15.0, 0.0),
-    Vector2(0.0,-5.0),
-    Vector2(-7.5,-5.0);
+  Errors e;
+  e += Vector2(0.0, 0.0), Vector2(15.0, 0.0), Vector2(0.0, -5.0), Vector2(-7.5, -5.0);
 
   VectorValues expected;
-  expected.insert(1, Vector2(-37.5,-50.0));
+  expected.insert(1, Vector2(-37.5, -50.0));
   expected.insert(2, Vector2(-150.0, 25.0));
   expected.insert(0, Vector2(187.5, 25.0));
 
@@ -222,8 +219,7 @@ TEST( GaussianFactorGraph, transposeMultiplication )
 }
 
 /* ************************************************************************* */
-TEST(GaussianFactorGraph, eliminate_empty )
-{
+TEST(GaussianFactorGraph, eliminate_empty) {
   // eliminate an empty factor
   GaussianFactorGraph gfg;
   gfg.add(JacobianFactor());
@@ -243,25 +239,31 @@ TEST(GaussianFactorGraph, eliminate_empty )
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, matrices2 )
-{
+TEST(GaussianFactorGraph, matrices2) {
   GaussianFactorGraph gfg = createSimpleGaussianFactorGraph();
-  Matrix A; Vector b; boost::tie(A,b) = gfg.jacobian();
-  Matrix AtA; Vector eta; boost::tie(AtA,eta) = gfg.hessian();
-  EXPECT(assert_equal(A.transpose()*A, AtA));
-  EXPECT(assert_equal(A.transpose()*b, eta));
+  Matrix A;
+  Vector b;
+  boost::tie(A, b) = gfg.jacobian();
+  Matrix AtA;
+  Vector eta;
+  boost::tie(AtA, eta) = gfg.hessian();
+  EXPECT(assert_equal(A.transpose() * A, AtA));
+  EXPECT(assert_equal(A.transpose() * b, eta));
+  Matrix expectedAtA(6, 6);
+  expectedAtA << 125, 0, -25, 0, -100, 0,  //
+      0, 125, 0, -25, 0, -100,             //
+      -25, 0, 50, 0, -25, 0,               //
+      0, -25, 0, 50, 0, -25,               //
+      -100, 0, -25, 0, 225, 0,             //
+      0, -100, 0, -25, 0, 225;
+  EXPECT(assert_equal(expectedAtA, AtA));
 }
 
-
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, multiplyHessianAdd )
-{
+TEST(GaussianFactorGraph, multiplyHessianAdd) {
   GaussianFactorGraph gfg = createSimpleGaussianFactorGraph();
 
-  VectorValues x = map_list_of<Key, Vector>
-    (0, Vector2(1,2))
-    (1, Vector2(3,4))
-    (2, Vector2(5,6));
+  VectorValues x = map_list_of<Key, Vector>(0, Vector2(1, 2))(1, Vector2(3, 4))(2, Vector2(5, 6));
 
   VectorValues expected;
   expected.insert(0, Vector2(-450, -450));
@@ -274,7 +276,7 @@ TEST( GaussianFactorGraph, multiplyHessianAdd )
 
   // now, do it with non-zero y
   gfg.multiplyHessianAdd(1.0, x, actual);
-  EXPECT(assert_equal(2*expected, actual));
+  EXPECT(assert_equal(2 * expected, actual));
 }
 
 /* ************************************************************************* */
@@ -286,20 +288,20 @@ static GaussianFactorGraph createGaussianFactorGraphWithHessianFactor() {
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, multiplyHessianAdd2 )
-{
+TEST(GaussianFactorGraph, multiplyHessianAdd2) {
   GaussianFactorGraph gfg = createGaussianFactorGraphWithHessianFactor();
 
   // brute force
-  Matrix AtA; Vector eta; boost::tie(AtA,eta) = gfg.hessian();
-  Vector X(6); X<<1,2,3,4,5,6;
-  Vector Y(6); Y<<-450, -450, 300, 400, 2950, 3450;
-  EXPECT(assert_equal(Y,AtA*X));
+  Matrix AtA;
+  Vector eta;
+  boost::tie(AtA, eta) = gfg.hessian();
+  Vector X(6);
+  X << 1, 2, 3, 4, 5, 6;
+  Vector Y(6);
+  Y << -450, -450, 300, 400, 2950, 3450;
+  EXPECT(assert_equal(Y, AtA * X));
 
-  VectorValues x = map_list_of<Key, Vector>
-    (0, Vector2(1,2))
-    (1, Vector2(3,4))
-    (2, Vector2(5,6));
+  VectorValues x = map_list_of<Key, Vector>(0, Vector2(1, 2))(1, Vector2(3, 4))(2, Vector2(5, 6));
 
   VectorValues expected;
   expected.insert(0, Vector2(-450, -450));
@@ -312,24 +314,26 @@ TEST( GaussianFactorGraph, multiplyHessianAdd2 )
 
   // now, do it with non-zero y
   gfg.multiplyHessianAdd(1.0, x, actual);
-  EXPECT(assert_equal(2*expected, actual));
+  EXPECT(assert_equal(2 * expected, actual));
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, matricesMixed )
-{
+TEST(GaussianFactorGraph, matricesMixed) {
   GaussianFactorGraph gfg = createGaussianFactorGraphWithHessianFactor();
-  Matrix A; Vector b; boost::tie(A,b) = gfg.jacobian(); // incorrect !
-  Matrix AtA; Vector eta; boost::tie(AtA,eta) = gfg.hessian(); // correct
-  EXPECT(assert_equal(A.transpose()*A, AtA));
-  Vector expected = - (Vector(6) << -25, 17.5, 5, -13.5, 29, 4).finished();
+  Matrix A;
+  Vector b;
+  boost::tie(A, b) = gfg.jacobian();  // incorrect !
+  Matrix AtA;
+  Vector eta;
+  boost::tie(AtA, eta) = gfg.hessian();  // correct
+  EXPECT(assert_equal(A.transpose() * A, AtA));
+  Vector expected = -(Vector(6) << -25, 17.5, 5, -13.5, 29, 4).finished();
   EXPECT(assert_equal(expected, eta));
-  EXPECT(assert_equal(A.transpose()*b, eta));
+  EXPECT(assert_equal(A.transpose() * b, eta));
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, gradientAtZero )
-{
+TEST(GaussianFactorGraph, gradientAtZero) {
   GaussianFactorGraph gfg = createGaussianFactorGraphWithHessianFactor();
   VectorValues expected;
   VectorValues actual = gfg.gradientAtZero();
@@ -340,29 +344,28 @@ TEST( GaussianFactorGraph, gradientAtZero )
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, clone ) {
+TEST(GaussianFactorGraph, clone) {
   // 2 variables, frontal has dim=4
   VerticalBlockMatrix blockMatrix(list_of(4)(2)(1), 4);
-  blockMatrix.matrix() <<
-      1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.1,
-      0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.2,
-      0.0, 0.0, 3.0, 0.0, 4.0, 0.0, 0.3,
-      0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 0.4;
+  blockMatrix.matrix() << 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.1, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.2, 0.0,
+      0.0, 3.0, 0.0, 4.0, 0.0, 0.3, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 0.4;
   GaussianConditional cg(list_of(1)(2), 1, blockMatrix);
 
   GaussianFactorGraph init_graph = createGaussianFactorGraphWithHessianFactor();
-  init_graph.push_back(GaussianFactor::shared_ptr()); /// Add null factor
+  init_graph.push_back(GaussianFactor::shared_ptr());  /// Add null factor
   init_graph.push_back(GaussianConditional(cg));
 
-  GaussianFactorGraph exp_graph = createGaussianFactorGraphWithHessianFactor(); // Created separately
-  exp_graph.push_back(GaussianFactor::shared_ptr()); /// Add null factor
+  GaussianFactorGraph exp_graph =
+      createGaussianFactorGraphWithHessianFactor();   // Created separately
+  exp_graph.push_back(GaussianFactor::shared_ptr());  /// Add null factor
   exp_graph.push_back(GaussianConditional(cg));
 
   GaussianFactorGraph actCloned = init_graph.clone();
-  EXPECT(assert_equal(init_graph, actCloned)); // Same as the original version
+  EXPECT(assert_equal(init_graph, actCloned));  // Same as the original version
 
   // Apply an in-place change to init_graph and compare
-  JacobianFactor::shared_ptr jacFactor0 = boost::dynamic_pointer_cast<JacobianFactor>(init_graph.at(0));
+  JacobianFactor::shared_ptr jacFactor0 =
+      boost::dynamic_pointer_cast<JacobianFactor>(init_graph.at(0));
   CHECK(jacFactor0);
   jacFactor0->getA(jacFactor0->begin()) *= 7.;
   EXPECT(assert_inequal(init_graph, exp_graph));
@@ -370,9 +373,9 @@ TEST( GaussianFactorGraph, clone ) {
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, negate ) {
+TEST(GaussianFactorGraph, negate) {
   GaussianFactorGraph init_graph = createGaussianFactorGraphWithHessianFactor();
-  init_graph.push_back(GaussianFactor::shared_ptr()); /// Add null factor
+  init_graph.push_back(GaussianFactor::shared_ptr());  /// Add null factor
   GaussianFactorGraph actNegation = init_graph.negate();
   GaussianFactorGraph expNegation;
   expNegation.push_back(init_graph.at(0)->negate());
@@ -385,8 +388,7 @@ TEST( GaussianFactorGraph, negate ) {
 }
 
 /* ************************************************************************* */
-TEST( GaussianFactorGraph, hessianDiagonal )
-{
+TEST(GaussianFactorGraph, hessianDiagonal) {
   GaussianFactorGraph gfg = createGaussianFactorGraphWithHessianFactor();
   VectorValues expected;
   Matrix infoMatrix = gfg.hessian().first;
@@ -399,6 +401,16 @@ TEST( GaussianFactorGraph, hessianDiagonal )
   EXPECT(assert_equal(expected, actual));
 }
 
+TEST(GaussianFactorGraph, DenseSolve) {
+  GaussianFactorGraph fg = createSimpleGaussianFactorGraph();
+  VectorValues expected = fg.optimize();
+  VectorValues actual = fg.optimizeDensely();
+  EXPECT(assert_equal(expected, actual));
+}
+
 /* ************************************************************************* */
-int main() { TestResult tr; return TestRegistry::runAllTests(tr);}
+int main() {
+  TestResult tr;
+  return TestRegistry::runAllTests(tr);
+}
 /* ************************************************************************* */
diff --git a/gtsam/linear/tests/testHessianFactor.cpp b/gtsam/linear/tests/testHessianFactor.cpp
index 911100337..61fa8bd2c 100644
--- a/gtsam/linear/tests/testHessianFactor.cpp
+++ b/gtsam/linear/tests/testHessianFactor.cpp
@@ -53,8 +53,8 @@ TEST(HessianFactor, emptyConstructor)
 {
   HessianFactor f;
   DOUBLES_EQUAL(0.0, f.constantTerm(), 1e-9);        // Constant term should be zero
-  EXPECT(assert_equal(Vector(), f.linearTerm()));    // Linear term should be empty
-  EXPECT(assert_equal((Matrix) Z_1x1, f.info()));        // Full matrix should be 1-by-1 zero matrix
+  //EXPECT(assert_equal(Vector(), f.linearTerm()));    // Linear term should be empty
+  EXPECT(assert_equal((Matrix) Z_1x1, f.info().selfadjointView())); // Full matrix should be 1-by-1 zero matrix
   DOUBLES_EQUAL(0.0, f.error(VectorValues()), 1e-9); // Should have zero error
 }
 
@@ -103,7 +103,7 @@ TEST(HessianFactor, Constructor1)
   HessianFactor factor(0, G, g, f);
 
   // extract underlying parts
-  EXPECT(assert_equal(G, Matrix(factor.info(factor.begin(), factor.begin()))));
+  EXPECT(assert_equal(G, Matrix(factor.info().diagonalBlock(0))));
   EXPECT_DOUBLES_EQUAL(f, factor.constantTerm(), 1e-10);
   EXPECT(assert_equal(g, Vector(factor.linearTerm())));
   EXPECT_LONGS_EQUAL(1, (long)factor.size());
@@ -118,7 +118,6 @@ TEST(HessianFactor, Constructor1)
   EXPECT_DOUBLES_EQUAL(expected, actual, 1e-10);
 }
 
-
 /* ************************************************************************* */
 TEST(HessianFactor, Constructor1b)
 {
@@ -132,7 +131,7 @@ TEST(HessianFactor, Constructor1b)
   double f = dot(g,mu);
 
   // Check
-  EXPECT(assert_equal(G, Matrix(factor.info(factor.begin(), factor.begin()))));
+  EXPECT(assert_equal(G, Matrix(factor.info().diagonalBlock(0))));
   EXPECT_DOUBLES_EQUAL(f, factor.constantTerm(), 1e-10);
   EXPECT(assert_equal(g, Vector(factor.linearTerm())));
   EXPECT_LONGS_EQUAL(1, (long)factor.size());
@@ -167,9 +166,9 @@ TEST(HessianFactor, Constructor2)
   Vector linearExpected(3);  linearExpected << g1, g2;
   EXPECT(assert_equal(linearExpected, factor.linearTerm()));
 
-  EXPECT(assert_equal(G11, factor.info(factor.begin(), factor.begin())));
-  EXPECT(assert_equal(G12, factor.info(factor.begin(), factor.begin()+1)));
-  EXPECT(assert_equal(G22, factor.info(factor.begin()+1, factor.begin()+1)));
+  EXPECT(assert_equal(G11, factor.info().diagonalBlock(0)));
+  EXPECT(assert_equal(G12, factor.info().aboveDiagonalBlock(0, 1)));
+  EXPECT(assert_equal(G22, factor.info().diagonalBlock(1)));
 
   // Check case when vector values is larger than factor
   VectorValues dxLarge = pair_list_of<Key, Vector>
@@ -218,12 +217,12 @@ TEST(HessianFactor, Constructor3)
   Vector linearExpected(6);  linearExpected << g1, g2, g3;
   EXPECT(assert_equal(linearExpected, factor.linearTerm()));
 
-  EXPECT(assert_equal(G11, factor.info(factor.begin()+0, factor.begin()+0)));
-  EXPECT(assert_equal(G12, factor.info(factor.begin()+0, factor.begin()+1)));
-  EXPECT(assert_equal(G13, factor.info(factor.begin()+0, factor.begin()+2)));
-  EXPECT(assert_equal(G22, factor.info(factor.begin()+1, factor.begin()+1)));
-  EXPECT(assert_equal(G23, factor.info(factor.begin()+1, factor.begin()+2)));
-  EXPECT(assert_equal(G33, factor.info(factor.begin()+2, factor.begin()+2)));
+  EXPECT(assert_equal(G11, factor.info().diagonalBlock(0)));
+  EXPECT(assert_equal(G12, factor.info().aboveDiagonalBlock(0, 1)));
+  EXPECT(assert_equal(G13, factor.info().aboveDiagonalBlock(0, 2)));
+  EXPECT(assert_equal(G22, factor.info().diagonalBlock(1)));
+  EXPECT(assert_equal(G23, factor.info().aboveDiagonalBlock(1, 2)));
+  EXPECT(assert_equal(G33, factor.info().diagonalBlock(2)));
 }
 
 /* ************************************************************************* */
@@ -271,12 +270,12 @@ TEST(HessianFactor, ConstructorNWay)
   Vector linearExpected(6);  linearExpected << g1, g2, g3;
   EXPECT(assert_equal(linearExpected, factor.linearTerm()));
 
-  EXPECT(assert_equal(G11, factor.info(factor.begin()+0, factor.begin()+0)));
-  EXPECT(assert_equal(G12, factor.info(factor.begin()+0, factor.begin()+1)));
-  EXPECT(assert_equal(G13, factor.info(factor.begin()+0, factor.begin()+2)));
-  EXPECT(assert_equal(G22, factor.info(factor.begin()+1, factor.begin()+1)));
-  EXPECT(assert_equal(G23, factor.info(factor.begin()+1, factor.begin()+2)));
-  EXPECT(assert_equal(G33, factor.info(factor.begin()+2, factor.begin()+2)));
+  EXPECT(assert_equal(G11, factor.info().diagonalBlock(0)));
+  EXPECT(assert_equal(G12, factor.info().aboveDiagonalBlock(0, 1)));
+  EXPECT(assert_equal(G13, factor.info().aboveDiagonalBlock(0, 2)));
+  EXPECT(assert_equal(G22, factor.info().diagonalBlock(1)));
+  EXPECT(assert_equal(G23, factor.info().aboveDiagonalBlock(1, 2)));
+  EXPECT(assert_equal(G33, factor.info().diagonalBlock(2)));
 }
 
 /* ************************************************************************* */
@@ -499,7 +498,7 @@ TEST(HessianFactor, combine) {
  -100.0000,       0.0,   20.0000,       0.0,   80.0000,       0.0,  -20.0000,
        0.0, -100.0000,       0.0,   20.0000,       0.0,   80.0000,   14.0000,
    25.0000,  -17.5000,   -5.0000,    3.5000,  -20.0000,   14.0000,    7.4500).finished();
-  EXPECT(assert_equal(expected, Matrix(actual.matrixObject().full()), tol));
+  EXPECT(assert_equal(expected, Matrix(actual.info().selfadjointView()), tol));
 
 }
 
@@ -575,6 +574,23 @@ TEST(HessianFactor, hessianDiagonal)
   EXPECT(assert_equal(G22,actualBD[1]));
 }
 
+/* ************************************************************************* */
+TEST(HessianFactor, Solve)
+{
+  Matrix2 A;
+  A << 1, 2, 3, 4;
+  Matrix2 G = A.transpose() * A;
+  Vector2 b(5, 6);
+  Vector2 g = A.transpose() * b;
+  double f = 0;
+  Key key(55);
+  HessianFactor factor(key, G, g, f);
+
+  VectorValues expected;
+  expected.insert(key, A.inverse() * b);
+  EXPECT(assert_equal(expected, factor.solve()));
+}
+
 /* ************************************************************************* */
 int main() { TestResult tr; return TestRegistry::runAllTests(tr);}
 /* ************************************************************************* */
diff --git a/gtsam/linear/tests/testNoiseModel.cpp b/gtsam/linear/tests/testNoiseModel.cpp
index d4403d3e3..6b130bea0 100644
--- a/gtsam/linear/tests/testNoiseModel.cpp
+++ b/gtsam/linear/tests/testNoiseModel.cpp
@@ -448,6 +448,15 @@ TEST(NoiseModel, WhitenInPlace)
 }
 
 /* ************************************************************************* */
+
+/*
+ * These tests are responsible for testing the weight functions for the m-estimators in GTSAM.
+ * The weight function is related to the analytic derivative of the residual function. See
+ *  http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html
+ * for details. This weight function is required when optimizing cost functions with robust
+ * penalties using iteratively re-weighted least squares.
+ */
+
 TEST(NoiseModel, robustFunctionHuber)
 {
   const double k = 5.0, error1 = 1.0, error2 = 10.0;
@@ -478,6 +487,26 @@ TEST(NoiseModel, robustFunctionDCS)
   DOUBLES_EQUAL(0.00039211, weight2, 1e-8);
 }
 
+TEST(NoiseModel, robustFunctionL2WithDeadZone)
+{
+  const double k = 1.0, e0 = -10.0, e1 = -1.01, e2 = -0.99, e3 = 0.99, e4 = 1.01, e5 = 10.0;
+  const mEstimator::L2WithDeadZone::shared_ptr lsdz = mEstimator::L2WithDeadZone::Create(k);
+
+  DOUBLES_EQUAL(0.9,           lsdz->weight(e0), 1e-8);
+  DOUBLES_EQUAL(0.00990099009, lsdz->weight(e1), 1e-8);
+  DOUBLES_EQUAL(0.0,           lsdz->weight(e2), 1e-8);
+  DOUBLES_EQUAL(0.0,           lsdz->weight(e3), 1e-8);
+  DOUBLES_EQUAL(0.00990099009, lsdz->weight(e4), 1e-8);
+  DOUBLES_EQUAL(0.9,           lsdz->weight(e5), 1e-8);
+
+  DOUBLES_EQUAL(40.5,    lsdz->residual(e0), 1e-8);
+  DOUBLES_EQUAL(0.00005, lsdz->residual(e1), 1e-8);
+  DOUBLES_EQUAL(0.0,     lsdz->residual(e2), 1e-8);
+  DOUBLES_EQUAL(0.0,     lsdz->residual(e3), 1e-8);
+  DOUBLES_EQUAL(0.00005, lsdz->residual(e4), 1e-8);
+  DOUBLES_EQUAL(40.5,    lsdz->residual(e5), 1e-8);
+}
+
 /* ************************************************************************* */
 TEST(NoiseModel, robustNoiseHuber)
 {
@@ -550,6 +579,31 @@ TEST(NoiseModel, robustNoiseDCS)
   DOUBLES_EQUAL(sqrt_weight*a11, A(1,1), 1e-8);
 }
 
+TEST(NoiseModel, robustNoiseL2WithDeadZone)
+{
+  double dead_zone_size = 1.0;
+  SharedNoiseModel robust = noiseModel::Robust::Create(
+    noiseModel::mEstimator::L2WithDeadZone::Create(dead_zone_size),
+    Unit::Create(3));
+
+/*
+ * TODO(mike): There is currently a bug in GTSAM, where none of the mEstimator classes
+ * implement a residual function, and GTSAM calls the weight function to evaluate the
+ * total penalty, rather than calling the residual function. The weight function should be
+ * used during iteratively reweighted least squares optimization, but should not be used to
+ * evaluate the total penalty. The long-term solution is for all mEstimators to implement
+ * both a weight and a residual function, and for GTSAM to call the residual function when
+ * evaluating the total penalty. This bug causes the test below to fail, so I'm leaving it
+ * commented out until the underlying bug in GTSAM is fixed.
+ *
+ * for (int i = 0; i < 5; i++) {
+ *   Vector3 error = Vector3(i, 0, 0);
+ *   DOUBLES_EQUAL(0.5*max(0,i-1)*max(0,i-1), robust->distance(error), 1e-8);
+ * }
+ */
+
+}
+
 /* ************************************************************************* */
 #define TEST_GAUSSIAN(gaussian)\
   EQUALITY(info, gaussian->information());\
diff --git a/gtsam/linear/tests/testRegularHessianFactor.cpp b/gtsam/linear/tests/testRegularHessianFactor.cpp
index f53803dd1..1618451f3 100644
--- a/gtsam/linear/tests/testRegularHessianFactor.cpp
+++ b/gtsam/linear/tests/testRegularHessianFactor.cpp
@@ -128,9 +128,8 @@ TEST(RegularHessianFactor, Constructors)
   EXPECT(assert_equal(2*expected_y, fast_y));
 
   // check some expressions
-  EXPECT(assert_equal(G12,factor.info(i1,i2).knownOffDiagonal()));
-  EXPECT(assert_equal(G22,factor.info(i2,i2).selfadjointView()));
-  EXPECT(assert_equal((Matrix)G12.transpose(),factor.info(i2,i1).knownOffDiagonal()));
+  EXPECT(assert_equal(G12,factor.info().aboveDiagonalBlock(i1 - factor.begin(), i2 - factor.begin())));
+  EXPECT(assert_equal(G22,factor.info().diagonalBlock(i2 - factor.begin())));
   }
 }
 
diff --git a/gtsam/linear/tests/testVectorValues.cpp b/gtsam/linear/tests/testVectorValues.cpp
index 949ec3a59..7e972903a 100644
--- a/gtsam/linear/tests/testVectorValues.cpp
+++ b/gtsam/linear/tests/testVectorValues.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -188,6 +188,14 @@ TEST(VectorValues, convert)
   VectorValues actual(x,dims);
   EXPECT(assert_equal(expected, actual));
 
+  Scatter scatter;
+  scatter.emplace_back(0,1);
+  scatter.emplace_back(1,2);
+  scatter.emplace_back(2,2);
+  scatter.emplace_back(5,2);
+  VectorValues actual2(x,scatter);
+  EXPECT(assert_equal(expected, actual2));
+
   // Test other direction, note vector() is not guaranteed to give right result
   FastVector<Key> keys = list_of(0)(1)(2)(5);
   EXPECT(assert_equal(x, actual.vector(keys)));
diff --git a/gtsam/navigation/ImuFactor.cpp b/gtsam/navigation/ImuFactor.cpp
index 7210f4dd2..cc1dc087e 100644
--- a/gtsam/navigation/ImuFactor.cpp
+++ b/gtsam/navigation/ImuFactor.cpp
@@ -96,8 +96,9 @@ void PreintegratedImuMeasurements::integrateMeasurements(
 void PreintegratedImuMeasurements::mergeWith(const PreintegratedImuMeasurements& pim12, //
     Matrix9* H1, Matrix9* H2) {
   PreintegrationType::mergeWith(pim12, H1, H2);
-  preintMeasCov_ =
-  *H1 * preintMeasCov_ * H1->transpose() + *H2 * pim12.preintMeasCov_ * H2->transpose();
+  // NOTE(gareth): Temporary P is needed as of Eigen 3.3
+  const Matrix9 P = *H1 * preintMeasCov_ * H1->transpose();
+  preintMeasCov_ = P + *H2 * pim12.preintMeasCov_ * H2->transpose();
 }
 #endif
 //------------------------------------------------------------------------------
diff --git a/gtsam/nonlinear/DoglegOptimizer.cpp b/gtsam/nonlinear/DoglegOptimizer.cpp
index a91515e9c..97326f06c 100644
--- a/gtsam/nonlinear/DoglegOptimizer.cpp
+++ b/gtsam/nonlinear/DoglegOptimizer.cpp
@@ -18,6 +18,7 @@
 
 #include <gtsam/nonlinear/DoglegOptimizer.h>
 #include <gtsam/nonlinear/DoglegOptimizerImpl.h>
+#include <gtsam/nonlinear/internal/NonlinearOptimizerState.h>
 #include <gtsam/linear/GaussianBayesTree.h>
 #include <gtsam/linear/GaussianBayesNet.h>
 #include <gtsam/linear/GaussianFactorGraph.h>
@@ -25,8 +26,6 @@
 
 #include <boost/algorithm/string.hpp>
 
-using namespace std;
-
 namespace gtsam {
 
 /* ************************************************************************* */
@@ -51,10 +50,41 @@ std::string DoglegParams::verbosityDLTranslator(VerbosityDL verbosityDL) const {
 }
 
 /* ************************************************************************* */
-void DoglegOptimizer::iterate(void) {
+namespace internal {
+struct DoglegState : public NonlinearOptimizerState {
+  const double delta;
+
+  DoglegState(const Values& values, double error, double delta, unsigned int iterations = 0)
+      : NonlinearOptimizerState(values, error, iterations), delta(delta) {}
+};
+}
+
+typedef internal::DoglegState State;
+
+/* ************************************************************************* */
+DoglegOptimizer::DoglegOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
+                                 const DoglegParams& params)
+    : NonlinearOptimizer(
+          graph, std::unique_ptr<State>(
+                     new State(initialValues, graph.error(initialValues), params.deltaInitial))),
+      params_(ensureHasOrdering(params, graph)) {}
+
+DoglegOptimizer::DoglegOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
+                                 const Ordering& ordering)
+    : NonlinearOptimizer(graph, std::unique_ptr<State>(
+                                    new State(initialValues, graph.error(initialValues), 1.0))) {
+  params_.ordering = ordering;
+}
+
+double DoglegOptimizer::getDelta() const {
+  return static_cast<const State*>(state_.get())->delta;
+}
+
+/* ************************************************************************* */
+GaussianFactorGraph::shared_ptr DoglegOptimizer::iterate(void) {
 
   // Linearize graph
-  GaussianFactorGraph::shared_ptr linear = graph_.linearize(state_.values);
+  GaussianFactorGraph::shared_ptr linear = graph_.linearize(state_->values);
 
   // Pull out parameters we'll use
   const bool dlVerbose = (params_.verbosityDL > DoglegParams::SILENT);
@@ -66,31 +96,30 @@ void DoglegOptimizer::iterate(void) {
     GaussianBayesTree bt = *linear->eliminateMultifrontal(*params_.ordering, params_.getEliminationFunction());
     VectorValues dx_u = bt.optimizeGradientSearch();
     VectorValues dx_n = bt.optimize();
-    result = DoglegOptimizerImpl::Iterate(state_.Delta, DoglegOptimizerImpl::ONE_STEP_PER_ITERATION,
-      dx_u, dx_n, bt, graph_, state_.values, state_.error, dlVerbose);
+    result = DoglegOptimizerImpl::Iterate(getDelta(), DoglegOptimizerImpl::ONE_STEP_PER_ITERATION,
+      dx_u, dx_n, bt, graph_, state_->values, state_->error, dlVerbose);
   }
   else if ( params_.isSequential() ) {
     GaussianBayesNet bn = *linear->eliminateSequential(*params_.ordering, params_.getEliminationFunction());
     VectorValues dx_u = bn.optimizeGradientSearch();
     VectorValues dx_n = bn.optimize();
-    result = DoglegOptimizerImpl::Iterate(state_.Delta, DoglegOptimizerImpl::ONE_STEP_PER_ITERATION,
-      dx_u, dx_n, bn, graph_, state_.values, state_.error, dlVerbose);
+    result = DoglegOptimizerImpl::Iterate(getDelta(), DoglegOptimizerImpl::ONE_STEP_PER_ITERATION,
+      dx_u, dx_n, bn, graph_, state_->values, state_->error, dlVerbose);
   }
   else if ( params_.isIterative() ) {
-    throw runtime_error("Dogleg is not currently compatible with the linear conjugate gradient solver");
+    throw std::runtime_error("Dogleg is not currently compatible with the linear conjugate gradient solver");
   }
   else {
-    throw runtime_error("Optimization parameter is invalid: DoglegParams::elimination");
+    throw std::runtime_error("Optimization parameter is invalid: DoglegParams::elimination");
   }
 
   // Maybe show output
   if(params_.verbosity >= NonlinearOptimizerParams::DELTA) result.dx_d.print("delta");
 
   // Create new state with new values and new error
-  state_.values = state_.values.retract(result.dx_d);
-  state_.error = result.f_error;
-  state_.Delta = result.Delta;
-  ++state_.iterations;
+  state_.reset(new State(state_->values.retract(result.dx_d), result.f_error, result.delta,
+                         state_->iterations + 1));
+  return linear;
 }
 
 /* ************************************************************************* */
diff --git a/gtsam/nonlinear/DoglegOptimizer.h b/gtsam/nonlinear/DoglegOptimizer.h
index 8a43b4d96..7013908e5 100644
--- a/gtsam/nonlinear/DoglegOptimizer.h
+++ b/gtsam/nonlinear/DoglegOptimizer.h
@@ -11,7 +11,7 @@
 
 /**
  * @file    DoglegOptimizer.h
- * @brief   
+ * @brief
  * @author  Richard Roberts
  * @date   Feb 26, 2012
  */
@@ -45,7 +45,7 @@ public:
 
   virtual ~DoglegParams() {}
 
-  virtual void print(const std::string& str = "") const {
+  void print(const std::string& str = "") const override {
     NonlinearOptimizerParams::print(str);
     std::cout << "               deltaInitial: " << deltaInitial << "\n";
     std::cout.flush();
@@ -62,24 +62,6 @@ private:
   std::string verbosityDLTranslator(VerbosityDL verbosityDL) const;
 };
 
-/**
- * State for DoglegOptimizer
- */
-class GTSAM_EXPORT DoglegState : public NonlinearOptimizerState {
-public:
-  double Delta;
-
-  DoglegState() {}
-
-  virtual ~DoglegState() {}
-
-protected:
-  DoglegState(const NonlinearFactorGraph& graph, const Values& values, const DoglegParams& params, unsigned int iterations = 0) :
-    NonlinearOptimizerState(graph, values, iterations), Delta(params.deltaInitial) {}
-
-  friend class DoglegOptimizer;
-};
-
 /**
  * This class performs Dogleg nonlinear optimization
  */
@@ -87,7 +69,6 @@ class GTSAM_EXPORT DoglegOptimizer : public NonlinearOptimizer {
 
 protected:
   DoglegParams params_;
-  DoglegState state_;
 
 public:
   typedef boost::shared_ptr<DoglegOptimizer> shared_ptr;
@@ -104,8 +85,7 @@ public:
    * @param params The optimization parameters
    */
   DoglegOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
-      const DoglegParams& params = DoglegParams()) :
-        NonlinearOptimizer(graph), params_(ensureHasOrdering(params, graph)), state_(graph, initialValues, params_) {}
+      const DoglegParams& params = DoglegParams());
 
   /** Standard constructor, requires a nonlinear factor graph, initial
    * variable assignments, and optimization parameters.  For convenience this
@@ -114,10 +94,8 @@ public:
    * @param graph The nonlinear factor graph to optimize
    * @param initialValues The initial variable assignments
    */
-  DoglegOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues, const Ordering& ordering) :
-        NonlinearOptimizer(graph) {
-    params_.ordering = ordering;
-    state_ = DoglegState(graph, initialValues, params_); }
+  DoglegOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
+                  const Ordering& ordering);
 
   /// @}
 
@@ -131,31 +109,19 @@ public:
    * containing the updated variable assignments, which may be retrieved with
    * values().
    */
-  virtual void iterate();
+  GaussianFactorGraph::shared_ptr iterate() override;
 
   /** Read-only access the parameters */
   const DoglegParams& params() const { return params_; }
 
-  /** Read/write access the parameters. */
-  DoglegParams& params() { return params_; }
-
-  /** Read-only access the last state */
-  const DoglegState& state() const { return state_; }
-
-  /** Read/write access the last state. When modifying the state, the error, etc. must be consistent before calling iterate() */
-  DoglegState& state() { return state_; }
-
-  /** Access the current trust region radius Delta */
-  double getDelta() const { return state_.Delta; }
+  /** Access the current trust region radius delta */
+  double getDelta() const;
 
   /// @}
 
 protected:
   /** Access the parameters (base class version) */
-  virtual const NonlinearOptimizerParams& _params() const { return params_; }
-
-  /** Access the state (base class version) */
-  virtual const NonlinearOptimizerState& _state() const { return state_; }
+  virtual const NonlinearOptimizerParams& _params() const override { return params_; }
 
   /** Internal function for computing a COLAMD ordering if no ordering is specified */
   DoglegParams ensureHasOrdering(DoglegParams params, const NonlinearFactorGraph& graph) const;
diff --git a/gtsam/nonlinear/DoglegOptimizerImpl.cpp b/gtsam/nonlinear/DoglegOptimizerImpl.cpp
index 671dfe8f8..c319f26e6 100644
--- a/gtsam/nonlinear/DoglegOptimizerImpl.cpp
+++ b/gtsam/nonlinear/DoglegOptimizerImpl.cpp
@@ -23,24 +23,24 @@ using namespace std;
 namespace gtsam {
 /* ************************************************************************* */
 VectorValues DoglegOptimizerImpl::ComputeDoglegPoint(
-    double Delta, const VectorValues& dx_u, const VectorValues& dx_n, const bool verbose) {
+    double delta, const VectorValues& dx_u, const VectorValues& dx_n, const bool verbose) {
 
-  // Get magnitude of each update and find out which segment Delta falls in
-  assert(Delta >= 0.0);
-  double DeltaSq = Delta*Delta;
+  // Get magnitude of each update and find out which segment delta falls in
+  assert(delta >= 0.0);
+  double deltaSq = delta*delta;
   double x_u_norm_sq = dx_u.squaredNorm();
   double x_n_norm_sq = dx_n.squaredNorm();
   if(verbose) cout << "Steepest descent magnitude " << std::sqrt(x_u_norm_sq) << ", Newton's method magnitude " << std::sqrt(x_n_norm_sq) << endl;
-  if(DeltaSq < x_u_norm_sq) {
+  if(deltaSq < x_u_norm_sq) {
     // Trust region is smaller than steepest descent update
-    VectorValues x_d = std::sqrt(DeltaSq / x_u_norm_sq) * dx_u;
-    if(verbose) cout << "In steepest descent region with fraction " << std::sqrt(DeltaSq / x_u_norm_sq) << " of steepest descent magnitude" << endl;
+    VectorValues x_d = std::sqrt(deltaSq / x_u_norm_sq) * dx_u;
+    if(verbose) cout << "In steepest descent region with fraction " << std::sqrt(deltaSq / x_u_norm_sq) << " of steepest descent magnitude" << endl;
     return x_d;
-  } else if(DeltaSq < x_n_norm_sq) {
+  } else if(deltaSq < x_n_norm_sq) {
     // Trust region boundary is between steepest descent point and Newton's method point
-    return ComputeBlend(Delta, dx_u, dx_n, verbose);
+    return ComputeBlend(delta, dx_u, dx_n, verbose);
   } else {
-    assert(DeltaSq >= x_n_norm_sq);
+    assert(deltaSq >= x_n_norm_sq);
     if(verbose) cout << "In pure Newton's method region" << endl;
     // Trust region is larger than Newton's method point
     return dx_n;
@@ -48,7 +48,7 @@ VectorValues DoglegOptimizerImpl::ComputeDoglegPoint(
 }
 
 /* ************************************************************************* */
-VectorValues DoglegOptimizerImpl::ComputeBlend(double Delta, const VectorValues& x_u, const VectorValues& x_n, const bool verbose) {
+VectorValues DoglegOptimizerImpl::ComputeBlend(double delta, const VectorValues& x_u, const VectorValues& x_n, const bool verbose) {
 
   // See doc/trustregion.lyx or doc/trustregion.pdf
 
@@ -60,7 +60,7 @@ VectorValues DoglegOptimizerImpl::ComputeBlend(double Delta, const VectorValues&
   // Compute quadratic formula terms
   const double a = uu - 2.*un + nn;
   const double b = 2. * (un - uu);
-  const double c = uu - Delta*Delta;
+  const double c = uu - delta*delta;
   double sqrt_b_m4ac = std::sqrt(b*b - 4*a*c);
 
   // Compute blending parameter
diff --git a/gtsam/nonlinear/DoglegOptimizerImpl.h b/gtsam/nonlinear/DoglegOptimizerImpl.h
index dea4113f7..cead7f9db 100644
--- a/gtsam/nonlinear/DoglegOptimizerImpl.h
+++ b/gtsam/nonlinear/DoglegOptimizerImpl.h
@@ -32,7 +32,7 @@ namespace gtsam {
 struct GTSAM_EXPORT DoglegOptimizerImpl {
 
   struct GTSAM_EXPORT IterationResult {
-    double Delta;
+    double delta;
     VectorValues dx_d;
     double f_error;
   };
@@ -58,7 +58,7 @@ struct GTSAM_EXPORT DoglegOptimizerImpl {
 
   /**
    * Compute the update point for one iteration of the Dogleg algorithm, given
-   * an initial trust region radius \f$ \Delta \f$.  The trust region radius is
+   * an initial trust region radius \f$ \delta \f$.  The trust region radius is
    * adapted based on the error of a NonlinearFactorGraph \f$ f(x) \f$, and
    * depending on the update mode \c mode.
    *
@@ -80,24 +80,24 @@ struct GTSAM_EXPORT DoglegOptimizerImpl {
    * @tparam F For normal usage this will be NonlinearFactorGraph<VALUES>.
    * @tparam VALUES The Values or TupleValues to pass to F::error() to evaluate
    * the error function.
-   * @param Delta The initial trust region radius.
+   * @param delta The initial trust region radius.
    * @param mode See DoglegOptimizerImpl::TrustRegionAdaptationMode
    * @param Rd The Bayes' net or tree as described above.
    * @param f The original nonlinear factor graph with which to evaluate the
-   * accuracy of \f$ M(\delta x) \f$ to adjust \f$ \Delta \f$.
+   * accuracy of \f$ M(\delta x) \f$ to adjust \f$ \delta \f$.
    * @param x0 The linearization point about which \f$ \bayesNet \f$ was created
    * @param ordering The variable ordering used to create\f$ \bayesNet \f$
    * @param f_error The result of <tt>f.error(x0)</tt>.
-   * @return A DoglegIterationResult containing the new \c Delta, the linear
+   * @return A DoglegIterationResult containing the new \c delta, the linear
    * update \c dx_d, and the resulting nonlinear error \c f_error.
    */
   template<class M, class F, class VALUES>
   static IterationResult Iterate(
-      double Delta, TrustRegionAdaptationMode mode, const VectorValues& dx_u, const VectorValues& dx_n,
+      double delta, TrustRegionAdaptationMode mode, const VectorValues& dx_u, const VectorValues& dx_n,
       const M& Rd, const F& f, const VALUES& x0, const double f_error, const bool verbose=false);
 
   /**
-   * Compute the dogleg point given a trust region radius \f$ \Delta \f$.  The
+   * Compute the dogleg point given a trust region radius \f$ \delta \f$.  The
    * dogleg point is the intersection between the dogleg path and the trust
    * region boundary, see doc/trustregion.pdf for more details.
    *
@@ -113,30 +113,30 @@ struct GTSAM_EXPORT DoglegOptimizerImpl {
    * upper-triangular and \f$ d \f$ is a vector, in GTSAM represented as a
    * GaussianBayesNet, containing GaussianConditional s.
    *
-   * @param Delta The trust region radius
+   * @param delta The trust region radius
    * @param dx_u The steepest descent point, i.e. the Cauchy point
    * @param dx_n The Gauss-Newton point
    * @return The dogleg point \f$ \delta x_d \f$
    */
-  static VectorValues ComputeDoglegPoint(double Delta, const VectorValues& dx_u, const VectorValues& dx_n, const bool verbose=false);
+  static VectorValues ComputeDoglegPoint(double delta, const VectorValues& dx_u, const VectorValues& dx_n, const bool verbose=false);
 
   /** Compute the point on the line between the steepest descent point and the
    * Newton's method point intersecting the trust region boundary.
    * Mathematically, computes \f$ \tau \f$ such that \f$ 0<\tau<1 \f$ and
-   * \f$ \| (1-\tau)\delta x_u + \tau\delta x_n \| = \Delta \f$, where \f$ \Delta \f$
+   * \f$ \| (1-\tau)\delta x_u + \tau\delta x_n \| = \delta \f$, where \f$ \delta \f$
    * is the trust region radius.
-   * @param Delta Trust region radius
+   * @param delta Trust region radius
    * @param x_u Steepest descent minimizer
    * @param x_n Newton's method minimizer
    */
-  static VectorValues ComputeBlend(double Delta, const VectorValues& x_u, const VectorValues& x_n, const bool verbose=false);
+  static VectorValues ComputeBlend(double delta, const VectorValues& x_u, const VectorValues& x_n, const bool verbose=false);
 };
 
 
 /* ************************************************************************* */
 template<class M, class F, class VALUES>
 typename DoglegOptimizerImpl::IterationResult DoglegOptimizerImpl::Iterate(
-    double Delta, TrustRegionAdaptationMode mode, const VectorValues& dx_u, const VectorValues& dx_n,
+    double delta, TrustRegionAdaptationMode mode, const VectorValues& dx_u, const VectorValues& dx_n,
     const M& Rd, const F& f, const VALUES& x0, const double f_error, const bool verbose)
 {
   gttic(M_error);
@@ -151,10 +151,10 @@ typename DoglegOptimizerImpl::IterationResult DoglegOptimizerImpl::Iterate(
   while(stay) {
     gttic(Dog_leg_point);
     // Compute dog leg point
-    result.dx_d = ComputeDoglegPoint(Delta, dx_u, dx_n, verbose);
+    result.dx_d = ComputeDoglegPoint(delta, dx_u, dx_n, verbose);
     gttoc(Dog_leg_point);
 
-    if(verbose) std::cout << "Delta = " << Delta << ", dx_d_norm = " << result.dx_d.norm() << std::endl;
+    if(verbose) std::cout << "delta = " << delta << ", dx_d_norm = " << result.dx_d.norm() << std::endl;
 
     gttic(retract);
     // Compute expmapped solution
@@ -174,7 +174,7 @@ typename DoglegOptimizerImpl::IterationResult DoglegOptimizerImpl::Iterate(
     if(verbose) std::cout << std::setprecision(15) << "f error: " << f_error << " -> " << result.f_error << std::endl;
     if(verbose) std::cout << std::setprecision(15) << "M error: " << M_error << " -> " << new_M_error << std::endl;
 
-    gttic(adjust_Delta);
+    gttic(adjust_delta);
     // Compute gain ratio.  Here we take advantage of the invariant that the
     // Bayes' net error at zero is equal to the nonlinear error
     const double rho = fabs(f_error - result.f_error) < 1e-15 || fabs(M_error - new_M_error) < 1e-15 ?
@@ -186,35 +186,35 @@ typename DoglegOptimizerImpl::IterationResult DoglegOptimizerImpl::Iterate(
     if(rho >= 0.75) {
       // M agrees very well with f, so try to increase lambda
       const double dx_d_norm = result.dx_d.norm();
-      const double newDelta = std::max(Delta, 3.0 * dx_d_norm); // Compute new Delta
+      const double newDelta = std::max(delta, 3.0 * dx_d_norm); // Compute new delta
 
       if(mode == ONE_STEP_PER_ITERATION || mode == SEARCH_REDUCE_ONLY)
-        stay = false;   // If not searching, just return with the new Delta
+        stay = false;   // If not searching, just return with the new delta
       else if(mode == SEARCH_EACH_ITERATION) {
-        if(fabs(newDelta - Delta) < 1e-15 || lastAction == DECREASED_DELTA)
+        if(fabs(newDelta - delta) < 1e-15 || lastAction == DECREASED_DELTA)
           stay = false; // Searching, but Newton's solution is within trust region so keep the same trust region
         else {
-          stay = true;  // Searching and increased Delta, so try again to increase Delta
+          stay = true;  // Searching and increased delta, so try again to increase delta
           lastAction = INCREASED_DELTA;
         }
       } else {
         assert(false); }
 
-      Delta = newDelta; // Update Delta from new Delta
+      delta = newDelta; // Update delta from new delta
 
     } else if(0.75 > rho && rho >= 0.25) {
-      // M agrees so-so with f, keep the same Delta
+      // M agrees so-so with f, keep the same delta
       stay = false;
 
     } else if(0.25 > rho && rho >= 0.0) {
-      // M does not agree well with f, decrease Delta until it does
+      // M does not agree well with f, decrease delta until it does
       double newDelta;
       bool hitMinimumDelta;
-      if(Delta > 1e-5) {
-        newDelta = 0.5 * Delta;
+      if(delta > 1e-5) {
+        newDelta = 0.5 * delta;
         hitMinimumDelta = false;
       } else {
-        newDelta = Delta;
+        newDelta = delta;
         hitMinimumDelta = true;
       }
       if(mode == ONE_STEP_PER_ITERATION || /* mode == SEARCH_EACH_ITERATION && */ lastAction == INCREASED_DELTA || hitMinimumDelta)
@@ -225,19 +225,19 @@ typename DoglegOptimizerImpl::IterationResult DoglegOptimizerImpl::Iterate(
       } else {
         assert(false); }
 
-      Delta = newDelta; // Update Delta from new Delta
+      delta = newDelta; // Update delta from new delta
 
     } else {
-      // f actually increased, so keep decreasing Delta until f does not decrease.
+      // f actually increased, so keep decreasing delta until f does not decrease.
       // NOTE:  NaN and Inf solutions also will fall into this case, so that we
-      // decrease Delta if the solution becomes undetermined.
+      // decrease delta if the solution becomes undetermined.
       assert(0.0 > rho);
-      if(Delta > 1e-5) {
-        Delta *= 0.5;
+      if(delta > 1e-5) {
+        delta *= 0.5;
         stay = true;
         lastAction = DECREASED_DELTA;
       } else {
-        if(verbose) std::cout << "Warning:  Dog leg stopping because cannot decrease error with minimum Delta" << std::endl;
+        if(verbose) std::cout << "Warning:  Dog leg stopping because cannot decrease error with minimum delta" << std::endl;
         result.dx_d.setZero(); // Set delta to zero - don't allow error to increase
         result.f_error = f_error;
         stay = false;
@@ -247,7 +247,7 @@ typename DoglegOptimizerImpl::IterationResult DoglegOptimizerImpl::Iterate(
   }
 
   // dx_d and f_error have already been filled in during the loop
-  result.Delta = Delta;
+  result.delta = delta;
   return result;
 }
 
diff --git a/gtsam/nonlinear/Expression-inl.h b/gtsam/nonlinear/Expression-inl.h
index e59201ca7..259bb1efe 100644
--- a/gtsam/nonlinear/Expression-inl.h
+++ b/gtsam/nonlinear/Expression-inl.h
@@ -263,26 +263,15 @@ template <typename T>
 ScalarMultiplyExpression<T>::ScalarMultiplyExpression(double s, const Expression<T>& e)
     : Expression<T>(boost::make_shared<internal::ScalarMultiplyNode<T>>(s, e)) {}
 
-template <typename T>
-SumExpression<T>::SumExpression(const std::vector<Expression<T>>& expressions)
-    : Expression<T>(boost::make_shared<internal::SumExpressionNode<T>>(expressions)) {}
 
 template <typename T>
-SumExpression<T> SumExpression<T>::operator+(const Expression<T>& e) const {
-  SumExpression<T> copy = *this;
-  boost::static_pointer_cast<internal::SumExpressionNode<T>>(copy.root_)->add(e);
-  return copy;
-}
+BinarySumExpression<T>::BinarySumExpression(const Expression<T>& e1, const Expression<T>& e2)
+    : Expression<T>(boost::make_shared<internal::BinarySumNode<T>>(e1, e2)) {}
 
 template <typename T>
-SumExpression<T>& SumExpression<T>::operator+=(const Expression<T>& e) {
-  boost::static_pointer_cast<internal::SumExpressionNode<T>>(this->root_)->add(e);
+Expression<T>& Expression<T>::operator+=(const Expression<T>& e) {
+  root_ = boost::make_shared<internal::BinarySumNode<T>>(*this, e);
   return *this;
 }
 
-template <typename T>
-size_t SumExpression<T>::nrTerms() const {
-  return boost::static_pointer_cast<internal::SumExpressionNode<T>>(this->root_)->nrTerms();
-}
-
 } // namespace gtsam
diff --git a/gtsam/nonlinear/Expression.h b/gtsam/nonlinear/Expression.h
index 6ed6bb4a5..7d02d479c 100644
--- a/gtsam/nonlinear/Expression.h
+++ b/gtsam/nonlinear/Expression.h
@@ -174,6 +174,9 @@ public:
   /// Return size needed for memory buffer in traceExecution
   size_t traceSize() const;
 
+  /// Add another expression to this expression
+  Expression<T>& operator+=(const Expression<T>& e);
+
 protected:
 
   /// Default constructor, for serialization
@@ -217,24 +220,19 @@ class ScalarMultiplyExpression : public Expression<T> {
 };
 
 /**
- *  A SumExpression is a specialization of Expression that just sums the arguments
+ *  A BinarySumExpression is a specialization of Expression that adds two expressions together
  *  It optimizes the Jacobian calculation for this specific case
  */
 template <typename T>
-class SumExpression : public Expression<T> {
+class BinarySumExpression : public Expression<T> {
   // Check that T is a vector space
   BOOST_CONCEPT_ASSERT((gtsam::IsVectorSpace<T>));
 
  public:
-  explicit SumExpression(const std::vector<Expression<T>>& expressions);
-
-  // Syntactic sugar to allow e1 + e2 + e3...
-  SumExpression operator+(const Expression<T>& e) const;
-  SumExpression& operator+=(const Expression<T>& e);
-
-  size_t nrTerms() const;
+  explicit BinarySumExpression(const Expression<T>& e1, const Expression<T>& e2);
 };
 
+
 /**
  * Create an expression out of a linear function f:T->A with (constant) Jacobian dTdA
  *  TODO(frank): create a more efficient version like ScalarMultiplyExpression. This version still
@@ -272,13 +270,14 @@ ScalarMultiplyExpression<T> operator*(double s, const Expression<T>& e) {
  *    Expression<Point2> a(0), b(1), c = a + b;
  */
 template <typename T>
-SumExpression<T> operator+(const Expression<T>& e1, const Expression<T>& e2) {
-  return SumExpression<T>({e1, e2});
+BinarySumExpression<T> operator+(const Expression<T>& e1, const Expression<T>& e2) {
+  return BinarySumExpression<T>(e1, e2);
 }
 
 /// Construct an expression that subtracts one expression from another
 template <typename T>
-SumExpression<T> operator-(const Expression<T>& e1, const Expression<T>& e2) {
+BinarySumExpression<T> operator-(const Expression<T>& e1, const Expression<T>& e2) {
+  // TODO(frank, abe): Implement an actual negate operator instead of multiplying by -1
   return e1 + (-1.0) * e2;
 }
 
diff --git a/gtsam/nonlinear/GaussNewtonOptimizer.cpp b/gtsam/nonlinear/GaussNewtonOptimizer.cpp
index d12c56b6f..c31451e56 100644
--- a/gtsam/nonlinear/GaussNewtonOptimizer.cpp
+++ b/gtsam/nonlinear/GaussNewtonOptimizer.cpp
@@ -17,36 +17,52 @@
  */
 
 #include <gtsam/nonlinear/GaussNewtonOptimizer.h>
+#include <gtsam/nonlinear/internal/NonlinearOptimizerState.h>
 #include <gtsam/linear/GaussianFactorGraph.h>
 #include <gtsam/linear/VectorValues.h>
 
-using namespace std;
-
 namespace gtsam {
 
-/* ************************************************************************* */
-void GaussNewtonOptimizer::iterate() {
-  gttic(GaussNewtonOptimizer_Iterate);
+typedef internal::NonlinearOptimizerState State;
 
-  const NonlinearOptimizerState& current = state_;
+/* ************************************************************************* */
+GaussNewtonOptimizer::GaussNewtonOptimizer(const NonlinearFactorGraph& graph,
+                                           const Values& initialValues,
+                                           const GaussNewtonParams& params)
+    : NonlinearOptimizer(
+          graph, std::unique_ptr<State>(new State(initialValues, graph.error(initialValues)))),
+      params_(ensureHasOrdering(params, graph)) {}
+
+GaussNewtonOptimizer::GaussNewtonOptimizer(const NonlinearFactorGraph& graph,
+                                           const Values& initialValues, const Ordering& ordering)
+    : NonlinearOptimizer(
+          graph, std::unique_ptr<State>(new State(initialValues, graph.error(initialValues)))) {
+  params_.ordering = ordering;
+}
+
+/* ************************************************************************* */
+GaussianFactorGraph::shared_ptr GaussNewtonOptimizer::iterate() {
+  gttic(GaussNewtonOptimizer_Iterate);
 
   // Linearize graph
   gttic(GaussNewtonOptimizer_Linearize);
-  GaussianFactorGraph::shared_ptr linear = graph_.linearize(current.values);
+  GaussianFactorGraph::shared_ptr linear = graph_.linearize(state_->values);
   gttoc(GaussNewtonOptimizer_Linearize);
 
   // Solve Factor Graph
   gttic(GaussNewtonOptimizer_Solve);
-  const VectorValues delta = solve(*linear, current.values, params_);
+  const VectorValues delta = solve(*linear, params_);
   gttoc(GaussNewtonOptimizer_Solve);
 
   // Maybe show output
-  if(params_.verbosity >= NonlinearOptimizerParams::DELTA) delta.print("delta");
+  if (params_.verbosity >= NonlinearOptimizerParams::DELTA)
+    delta.print("delta");
 
   // Create new state with new values and new error
-  state_.values = current.values.retract(delta);
-  state_.error = graph_.error(state_.values);
-  ++ state_.iterations;
+  Values newValues = state_->values.retract(delta);
+  state_.reset(new State(std::move(newValues), graph_.error(newValues), state_->iterations + 1));
+
+  return linear;
 }
 
 /* ************************************************************************* */
diff --git a/gtsam/nonlinear/GaussNewtonOptimizer.h b/gtsam/nonlinear/GaussNewtonOptimizer.h
index 8b41a979c..3e75fc23d 100644
--- a/gtsam/nonlinear/GaussNewtonOptimizer.h
+++ b/gtsam/nonlinear/GaussNewtonOptimizer.h
@@ -30,14 +30,6 @@ class GaussNewtonOptimizer;
 class GTSAM_EXPORT GaussNewtonParams : public NonlinearOptimizerParams {
 };
 
-class GTSAM_EXPORT GaussNewtonState : public NonlinearOptimizerState {
-protected:
-  GaussNewtonState(const NonlinearFactorGraph& graph, const Values& values, unsigned int iterations = 0) :
-    NonlinearOptimizerState(graph, values, iterations) {}
-
-  friend class GaussNewtonOptimizer;
-};
-
 /**
  * This class performs Gauss-Newton nonlinear optimization
  */
@@ -45,7 +37,6 @@ class GTSAM_EXPORT GaussNewtonOptimizer : public NonlinearOptimizer {
 
 protected:
   GaussNewtonParams params_;
-  GaussNewtonState state_;
 
 public:
   /// @name Standard interface
@@ -59,9 +50,8 @@ public:
    * @param initialValues The initial variable assignments
    * @param params The optimization parameters
    */
-  GaussNewtonOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
-      const GaussNewtonParams& params = GaussNewtonParams()) :
-        NonlinearOptimizer(graph), params_(ensureHasOrdering(params, graph)), state_(graph, initialValues) {}
+ GaussNewtonOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
+                      const GaussNewtonParams& params = GaussNewtonParams());
 
   /** Standard constructor, requires a nonlinear factor graph, initial
    * variable assignments, and optimization parameters.  For convenience this
@@ -70,10 +60,8 @@ public:
    * @param graph The nonlinear factor graph to optimize
    * @param initialValues The initial variable assignments
    */
-  GaussNewtonOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues, const Ordering& ordering) :
-        NonlinearOptimizer(graph), state_(graph, initialValues) {
-    params_.ordering = ordering; }
-
+  GaussNewtonOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
+                       const Ordering& ordering);
   /// @}
 
   /// @name Advanced interface
@@ -86,28 +74,16 @@ public:
    * containing the updated variable assignments, which may be retrieved with
    * values().
    */
-  virtual void iterate();
+  GaussianFactorGraph::shared_ptr iterate() override;
 
   /** Read-only access the parameters */
   const GaussNewtonParams& params() const { return params_; }
 
-  /** Read/write access the parameters. */
-  GaussNewtonParams& params() { return params_; }
-
-  /** Read-only access the last state */
-  const GaussNewtonState& state() const { return state_; }
-
-  /** Read/write access the last state. When modifying the state, the error, etc. must be consistent before calling iterate() */
-  GaussNewtonState& state() { return state_; }
-
   /// @}
 
 protected:
   /** Access the parameters (base class version) */
-  virtual const NonlinearOptimizerParams& _params() const { return params_; }
-
-  /** Access the state (base class version) */
-  virtual const NonlinearOptimizerState& _state() const { return state_; }
+  const NonlinearOptimizerParams& _params() const override { return params_; }
 
   /** Internal function for computing a COLAMD ordering if no ordering is specified */
   GaussNewtonParams ensureHasOrdering(GaussNewtonParams params, const NonlinearFactorGraph& graph) const;
diff --git a/gtsam/nonlinear/ISAM2-inl.h b/gtsam/nonlinear/ISAM2-inl.h
index ccaf898d3..f04e16b7d 100644
--- a/gtsam/nonlinear/ISAM2-inl.h
+++ b/gtsam/nonlinear/ISAM2-inl.h
@@ -140,6 +140,7 @@ bool optimizeWildfireNode(const boost::shared_ptr<CLIQUE>& clique, double thresh
 
   // Solve clique if it was replaced, or if any parents were changed above the
   // threshold or themselves replaced.
+  // TODO(gareth): This code shares a lot of logic w/ linearAlgorithms-inst, potentially refactor
   if(recalculate)
   {
     // Temporary copy of the original values, to check how much they change
@@ -188,16 +189,21 @@ bool optimizeWildfireNode(const boost::shared_ptr<CLIQUE>& clique, double thresh
             vectorPos += parentVector.size();
           }
         }
-        xS = c.getb() - c.get_S() * xS;
-        Vector soln = c.get_R().triangularView<Eigen::Upper>().solve(xS);
+
+        // NOTE(gareth): We can no longer write: xS = b - S * xS
+        // This is because Eigen (as of 3.3) no longer evaluates S * xS into
+        // a temporary, and the operation trashes valus in xS.
+        // See: http://eigen.tuxfamily.org/index.php?title=3.3
+        const Vector rhs = c.getb() - c.get_S() * xS;
+        const Vector solution = c.get_R().triangularView<Eigen::Upper>().solve(rhs);
 
         // Check for indeterminant solution
-        if(soln.hasNaN()) throw IndeterminantLinearSystemException(c.keys().front());
+        if(solution.hasNaN()) throw IndeterminantLinearSystemException(c.keys().front());
 
         // Insert solution into a VectorValues
         DenseIndex vectorPosition = 0;
         for(GaussianConditional::const_iterator frontal = c.beginFrontals(); frontal != c.endFrontals(); ++frontal) {
-          clique->solnPointers_.at(*frontal)->second = soln.segment(vectorPosition, c.getDim(frontal));
+          clique->solnPointers_.at(*frontal)->second = solution.segment(vectorPosition, c.getDim(frontal));
           vectorPosition += c.getDim(frontal);
         }
       }
diff --git a/gtsam/nonlinear/ISAM2.cpp b/gtsam/nonlinear/ISAM2.cpp
index a0244ce14..18296b393 100644
--- a/gtsam/nonlinear/ISAM2.cpp
+++ b/gtsam/nonlinear/ISAM2.cpp
@@ -1004,7 +1004,7 @@ void ISAM2::updateDelta(bool forceFullSolve) const
 
     gttic(Copy_dx_d);
     // Update Delta and linear step
-    doglegDelta_ = doglegResult.Delta;
+    doglegDelta_ = doglegResult.delta;
     delta_ = doglegResult.dx_d; // Copy the VectorValues containing with the linear solution
     gttoc(Copy_dx_d);
   }
diff --git a/gtsam/nonlinear/LevenbergMarquardtOptimizer.cpp b/gtsam/nonlinear/LevenbergMarquardtOptimizer.cpp
index fdd86d36b..5f29c3bdf 100644
--- a/gtsam/nonlinear/LevenbergMarquardtOptimizer.cpp
+++ b/gtsam/nonlinear/LevenbergMarquardtOptimizer.cpp
@@ -11,392 +11,286 @@
 
 /**
  * @file    LevenbergMarquardtOptimizer.cpp
- * @brief   
+ * @brief   A nonlinear optimizer that uses the Levenberg-Marquardt trust-region scheme
  * @author  Richard Roberts
+ * @author  Frank Dellaert
  * @author  Luca Carlone
- * @date  Feb 26, 2012
+ * @date    Feb 26, 2012
  */
 
 #include <gtsam/nonlinear/LevenbergMarquardtOptimizer.h>
-#include <gtsam/linear/linearExceptions.h>
+#include <gtsam/nonlinear/internal/LevenbergMarquardtState.h>
+#include <gtsam/nonlinear/NonlinearFactorGraph.h>
+#include <gtsam/nonlinear/Values.h>
 #include <gtsam/linear/GaussianFactorGraph.h>
-#include <gtsam/linear/VectorValues.h>
-#include <gtsam/linear/Errors.h>
+#include <gtsam/linear/linearExceptions.h>
+#include <gtsam/inference/Ordering.h>
+#include <gtsam/base/Vector.h>
 #include <gtsam/base/timing.h>
 
-#include <boost/algorithm/string.hpp>
-#include <boost/range/adaptor/map.hpp>
 #include <boost/format.hpp>
+#include <boost/optional.hpp>
+#include <boost/range/adaptor/map.hpp>
 
+#include <cmath>
 #include <fstream>
+#include <iostream>
 #include <limits>
 #include <string>
-#include <cmath>
 
 using namespace std;
 
 namespace gtsam {
 
 using boost::adaptors::map_values;
+typedef internal::LevenbergMarquardtState State;
 
 /* ************************************************************************* */
-LevenbergMarquardtParams::VerbosityLM LevenbergMarquardtParams::verbosityLMTranslator(
-    const std::string &src) {
-  std::string s = src;
-  boost::algorithm::to_upper(s);
-  if (s == "SILENT")
-    return LevenbergMarquardtParams::SILENT;
-  if (s == "SUMMARY")
-    return LevenbergMarquardtParams::SUMMARY;
-  if (s == "LAMBDA")
-    return LevenbergMarquardtParams::LAMBDA;
-  if (s == "TRYLAMBDA")
-    return LevenbergMarquardtParams::TRYLAMBDA;
-  if (s == "TRYCONFIG")
-    return LevenbergMarquardtParams::TRYCONFIG;
-  if (s == "TRYDELTA")
-    return LevenbergMarquardtParams::TRYDELTA;
-  if (s == "DAMPED")
-    return LevenbergMarquardtParams::DAMPED;
+LevenbergMarquardtOptimizer::LevenbergMarquardtOptimizer(const NonlinearFactorGraph& graph,
+                                                         const Values& initialValues,
+                                                         const LevenbergMarquardtParams& params)
+    : NonlinearOptimizer(
+          graph, std::unique_ptr<State>(new State(initialValues, graph.error(initialValues),
+                                                  params.lambdaInitial, params.lambdaFactor))),
+      params_(LevenbergMarquardtParams::EnsureHasOrdering(params, graph)) {}
 
-  /* default is silent */
-  return LevenbergMarquardtParams::SILENT;
+LevenbergMarquardtOptimizer::LevenbergMarquardtOptimizer(const NonlinearFactorGraph& graph,
+                                                         const Values& initialValues,
+                                                         const Ordering& ordering,
+                                                         const LevenbergMarquardtParams& params)
+    : NonlinearOptimizer(
+          graph, std::unique_ptr<State>(new State(initialValues, graph.error(initialValues),
+                                                  params.lambdaInitial, params.lambdaFactor))),
+      params_(LevenbergMarquardtParams::ReplaceOrdering(params, ordering)) {}
+
+/* ************************************************************************* */
+void LevenbergMarquardtOptimizer::initTime() {
+  startTime_ = boost::posix_time::microsec_clock::universal_time();
 }
 
 /* ************************************************************************* */
-std::string LevenbergMarquardtParams::verbosityLMTranslator(
-    VerbosityLM value) {
-  std::string s;
-  switch (value) {
-  case LevenbergMarquardtParams::SILENT:
-    s = "SILENT";
-    break;
-  case LevenbergMarquardtParams::SUMMARY:
-    s = "SUMMARY";
-    break;
-  case LevenbergMarquardtParams::TERMINATION:
-    s = "TERMINATION";
-    break;
-  case LevenbergMarquardtParams::LAMBDA:
-    s = "LAMBDA";
-    break;
-  case LevenbergMarquardtParams::TRYLAMBDA:
-    s = "TRYLAMBDA";
-    break;
-  case LevenbergMarquardtParams::TRYCONFIG:
-    s = "TRYCONFIG";
-    break;
-  case LevenbergMarquardtParams::TRYDELTA:
-    s = "TRYDELTA";
-    break;
-  case LevenbergMarquardtParams::DAMPED:
-    s = "DAMPED";
-    break;
-  default:
-    s = "UNDEFINED";
-    break;
-  }
-  return s;
+double LevenbergMarquardtOptimizer::lambda() const {
+  auto currentState = static_cast<const State*>(state_.get());
+  return currentState->lambda;
 }
 
 /* ************************************************************************* */
-void LevenbergMarquardtParams::print(const std::string& str) const {
-  NonlinearOptimizerParams::print(str);
-  std::cout << "              lambdaInitial: " << lambdaInitial << "\n";
-  std::cout << "               lambdaFactor: " << lambdaFactor << "\n";
-  std::cout << "           lambdaUpperBound: " << lambdaUpperBound << "\n";
-  std::cout << "           lambdaLowerBound: " << lambdaLowerBound << "\n";
-  std::cout << "           minModelFidelity: " << minModelFidelity << "\n";
-  std::cout << "            diagonalDamping: " << diagonalDamping << "\n";
-  std::cout << "                minDiagonal: " << minDiagonal << "\n";
-  std::cout << "                maxDiagonal: " << maxDiagonal << "\n";
-  std::cout << "                verbosityLM: "
-      << verbosityLMTranslator(verbosityLM) << "\n";
-  std::cout.flush();
+int LevenbergMarquardtOptimizer::getInnerIterations() const {
+  auto currentState = static_cast<const State*>(state_.get());
+  return currentState->totalNumberInnerIterations;
 }
 
 /* ************************************************************************* */
 GaussianFactorGraph::shared_ptr LevenbergMarquardtOptimizer::linearize() const {
-  return graph_.linearize(state_.values);
+  return graph_.linearize(state_->values);
 }
 
 /* ************************************************************************* */
-void LevenbergMarquardtOptimizer::increaseLambda() {
-  if (params_.useFixedLambdaFactor) {
-    state_.lambda *= params_.lambdaFactor;
-  } else {
-    state_.lambda *= params_.lambdaFactor;
-    params_.lambdaFactor *= 2.0;
-  }
-  state_.reuseDiagonal = true;
-}
-
-/* ************************************************************************* */
-void LevenbergMarquardtOptimizer::decreaseLambda(double stepQuality) {
-
-  if (params_.useFixedLambdaFactor) {
-    state_.lambda /= params_.lambdaFactor;
-  } else {
-    // CHECK_GT(step_quality, 0.0);
-    state_.lambda *= std::max(1.0 / 3.0, 1.0 - pow(2.0 * stepQuality - 1.0, 3));
-    params_.lambdaFactor = 2.0;
-  }
-  state_.lambda = std::max(params_.lambdaLowerBound, state_.lambda);
-  state_.reuseDiagonal = false;
-
-}
-
-/* ************************************************************************* */
-GaussianFactorGraph::shared_ptr LevenbergMarquardtOptimizer::buildDampedSystem(
-    const GaussianFactorGraph& linear) {
-
+GaussianFactorGraph LevenbergMarquardtOptimizer::buildDampedSystem(
+    const GaussianFactorGraph& linear, const VectorValues& sqrtHessianDiagonal) const {
   gttic(damp);
+  auto currentState = static_cast<const State*>(state_.get());
+
   if (params_.verbosityLM >= LevenbergMarquardtParams::DAMPED)
-    cout << "building damped system with lambda " << state_.lambda << endl;
+    std::cout << "building damped system with lambda " << currentState->lambda << std::endl;
 
-  // Only retrieve diagonal vector when reuse_diagonal = false
-  if (params_.diagonalDamping && state_.reuseDiagonal == false) {
-    state_.hessianDiagonal = linear.hessianDiagonal();
-    for(Vector& v: state_.hessianDiagonal | map_values) {
-      for (int aa = 0; aa < v.size(); aa++) {
-        v(aa) = std::min(std::max(v(aa), params_.minDiagonal),
-            params_.maxDiagonal);
-        v(aa) = sqrt(v(aa));
-      }
-    }
-  } // reuse diagonal
-
-  // for each of the variables, add a prior
-  double sigma = 1.0 / std::sqrt(state_.lambda);
-  GaussianFactorGraph::shared_ptr dampedPtr = linear.cloneToPtr();
-  GaussianFactorGraph &damped = (*dampedPtr);
-  damped.reserve(damped.size() + state_.values.size());
-  if (params_.diagonalDamping) {
-    for(const VectorValues::KeyValuePair& key_vector: state_.hessianDiagonal) {
-      // Fill in the diagonal of A with diag(hessian)
-      try {
-        Matrix A = Eigen::DiagonalMatrix<double, Eigen::Dynamic>(
-            state_.hessianDiagonal.at(key_vector.first));
-        size_t dim = key_vector.second.size();
-        Vector b = Vector::Zero(dim);
-        SharedDiagonal model = noiseModel::Isotropic::Sigma(dim, sigma);
-        damped += boost::make_shared<JacobianFactor>(key_vector.first, A, b,
-            model);
-      } catch (const std::exception& e) {
-        // Don't attempt any damping if no key found in diagonal
-        continue;
-      }
-    }
-  } else {
-    // Straightforward damping:
-
-    // initialize noise model cache to a reasonable default size
-    NoiseCacheVector noises(6);
-    for(const Values::KeyValuePair& key_value: state_.values) {
-      size_t dim = key_value.value.dim();
-
-      if (dim > noises.size())
-        noises.resize(dim);
-
-      NoiseCacheItem& item = noises[dim-1];
-
-      // Initialize noise model, A and b if we haven't done so already
-      if(!item.model) {
-        item.A = Matrix::Identity(dim, dim);
-        item.b = Vector::Zero(dim);
-        item.model = noiseModel::Isotropic::Sigma(dim, sigma);
-      }
-      damped += boost::make_shared<JacobianFactor>(key_value.key, item.A, item.b, item.model);
-    }
-  }
-  gttoc(damp);
-  return dampedPtr;
+  if (params_.diagonalDamping)
+    return currentState->buildDampedSystem(linear, sqrtHessianDiagonal);
+  else
+    return currentState->buildDampedSystem(linear);
 }
 
 /* ************************************************************************* */
 // Log current error/lambda to file
 inline void LevenbergMarquardtOptimizer::writeLogFile(double currentError){
+  auto currentState = static_cast<const State*>(state_.get());
+
   if (!params_.logFile.empty()) {
     ofstream os(params_.logFile.c_str(), ios::app);
     boost::posix_time::ptime currentTime = boost::posix_time::microsec_clock::universal_time();
-    os << /*inner iterations*/ state_.totalNumberInnerIterations << ","
-        << 1e-6 * (currentTime - state_.startTime).total_microseconds() << ","
-        << /*current error*/ currentError << "," << state_.lambda << ","
-        << /*outer iterations*/ state_.iterations << endl;
+    os << /*inner iterations*/ currentState->totalNumberInnerIterations << ","
+        << 1e-6 * (currentTime - startTime_).total_microseconds() << ","
+        << /*current error*/ currentError << "," << currentState->lambda << ","
+        << /*outer iterations*/ currentState->iterations << endl;
   }
 }
 
 /* ************************************************************************* */
-void LevenbergMarquardtOptimizer::iterate() {
+bool LevenbergMarquardtOptimizer::tryLambda(const GaussianFactorGraph& linear,
+                                            const VectorValues& sqrtHessianDiagonal) {
+  auto currentState = static_cast<const State*>(state_.get());
+  bool verbose = (params_.verbosityLM >= LevenbergMarquardtParams::TRYLAMBDA);
+
+#ifdef GTSAM_USING_NEW_BOOST_TIMERS
+  boost::timer::cpu_timer lamda_iteration_timer;
+  lamda_iteration_timer.start();
+#else
+  boost::timer lamda_iteration_timer;
+  lamda_iteration_timer.restart();
+#endif
+
+  if (verbose)
+    cout << "trying lambda = " << currentState->lambda << endl;
+
+  // Build damped system for this lambda (adds prior factors that make it like gradient descent)
+  auto dampedSystem = buildDampedSystem(linear, sqrtHessianDiagonal);
+
+  // Try solving
+  double modelFidelity = 0.0;
+  bool step_is_successful = false;
+  bool stopSearchingLambda = false;
+  double newError = numeric_limits<double>::infinity(), costChange;
+  Values newValues;
+  VectorValues delta;
+
+  bool systemSolvedSuccessfully;
+  try {
+    // ============ Solve is where most computation happens !! =================
+    delta = solve(dampedSystem, params_);
+    systemSolvedSuccessfully = true;
+  } catch (const IndeterminantLinearSystemException& e) {
+    systemSolvedSuccessfully = false;
+  }
+
+  if (systemSolvedSuccessfully) {
+    if (verbose)
+      cout << "linear delta norm = " << delta.norm() << endl;
+    if (params_.verbosityLM >= LevenbergMarquardtParams::TRYDELTA)
+      delta.print("delta");
+
+    // cost change in the linearized system (old - new)
+    double newlinearizedError = linear.error(delta);
+
+    double linearizedCostChange = currentState->error - newlinearizedError;
+    if (verbose)
+      cout << "newlinearizedError = " << newlinearizedError
+           << "  linearizedCostChange = " << linearizedCostChange << endl;
+
+    if (linearizedCostChange >= 0) {  // step is valid
+      // update values
+      gttic(retract);
+      // ============ This is where the solution is updated ====================
+      newValues = currentState->values.retract(delta);
+      // =======================================================================
+      gttoc(retract);
+
+      // compute new error
+      gttic(compute_error);
+      if (verbose)
+        cout << "calculating error:" << endl;
+      newError = graph_.error(newValues);
+      gttoc(compute_error);
+
+      if (verbose)
+        cout << "old error (" << currentState->error << ") new (tentative) error (" << newError
+             << ")" << endl;
+
+      // cost change in the original, nonlinear system (old - new)
+      costChange = currentState->error - newError;
+
+      if (linearizedCostChange >
+          1e-20) {  // the (linear) error has to decrease to satisfy this condition
+        // fidelity of linearized model VS original system between
+        modelFidelity = costChange / linearizedCostChange;
+        // if we decrease the error in the nonlinear system and modelFidelity is above threshold
+        step_is_successful = modelFidelity > params_.minModelFidelity;
+        if (verbose)
+          cout << "modelFidelity: " << modelFidelity << endl;
+      }  // else we consider the step non successful and we either increase lambda or stop if error
+         // change is small
+
+      double minAbsoluteTolerance = params_.relativeErrorTol * currentState->error;
+      // if the change is small we terminate
+      if (fabs(costChange) < minAbsoluteTolerance) {
+        if (verbose)
+          cout << "fabs(costChange)=" << fabs(costChange)
+               << "  minAbsoluteTolerance=" << minAbsoluteTolerance
+               << " (relativeErrorTol=" << params_.relativeErrorTol << ")" << endl;
+        stopSearchingLambda = true;
+      }
+    }
+  } // if (systemSolvedSuccessfully)
+
+  if (params_.verbosityLM == LevenbergMarquardtParams::SUMMARY) {
+// do timing
+#ifdef GTSAM_USING_NEW_BOOST_TIMERS
+    double iterationTime = 1e-9 * lamda_iteration_timer.elapsed().wall;
+#else
+    double iterationTime = lamda_iteration_timer.elapsed();
+#endif
+    if (currentState->iterations == 0)
+      cout << "iter      cost      cost_change    lambda  success iter_time" << endl;
+
+    cout << boost::format("% 4d % 8e   % 3.2e   % 3.2e  % 4d   % 3.2e") % currentState->iterations %
+                newError % costChange % currentState->lambda % systemSolvedSuccessfully %
+                iterationTime << endl;
+  }
+
+  if (step_is_successful) {
+    // we have successfully decreased the cost and we have good modelFidelity
+    // NOTE(frank): As we return immediately after this, we move the newValues
+    // TODO(frank): make Values actually support move. Does not seem to happen now.
+    state_ = currentState->decreaseLambda(params_, modelFidelity, std::move(newValues), newError);
+    return true;
+  } else if (!stopSearchingLambda) {  // we failed to solved the system or had no decrease in cost
+    if (verbose)
+      cout << "increasing lambda" << endl;
+    State* modifiedState = static_cast<State*>(state_.get());
+    modifiedState->increaseLambda(params_); // TODO(frank): make this functional with Values move
+
+    // check if lambda is too big
+    if (modifiedState->lambda >= params_.lambdaUpperBound) {
+      if (params_.verbosity >= NonlinearOptimizerParams::TERMINATION ||
+          params_.verbosityLM == LevenbergMarquardtParams::SUMMARY)
+        cout << "Warning:  Levenberg-Marquardt giving up because "
+                "cannot decrease error with maximum lambda" << endl;
+      return true;
+    } else {
+      return false;  // only case where we will keep trying
+    }
+  } else {  // the change in the cost is very small and it is not worth trying bigger lambdas
+    if (verbose)
+      cout << "Levenberg-Marquardt: stopping as relative cost reduction is small" << endl;
+    return true;
+  }
+}
+
+/* ************************************************************************* */
+GaussianFactorGraph::shared_ptr LevenbergMarquardtOptimizer::iterate() {
+  auto currentState = static_cast<const State*>(state_.get());
 
   gttic(LM_iterate);
 
-  // Pull out parameters we'll use
-  const NonlinearOptimizerParams::Verbosity nloVerbosity = params_.verbosity;
-  const LevenbergMarquardtParams::VerbosityLM lmVerbosity = params_.verbosityLM;
-
   // Linearize graph
-  if (lmVerbosity >= LevenbergMarquardtParams::DAMPED)
+  if (params_.verbosityLM >= LevenbergMarquardtParams::DAMPED)
     cout << "linearizing = " << endl;
   GaussianFactorGraph::shared_ptr linear = linearize();
 
-  if(state_.totalNumberInnerIterations==0) { // write initial error
-    writeLogFile(state_.error);
+  if(currentState->totalNumberInnerIterations==0) { // write initial error
+    writeLogFile(currentState->error);
 
-    if (lmVerbosity == LevenbergMarquardtParams::SUMMARY) {
-      cout << "Initial error: " << state_.error << ", values: " << state_.values.size()
-           << std::endl;
+    if (params_.verbosityLM == LevenbergMarquardtParams::SUMMARY) {
+      cout << "Initial error: " << currentState->error
+           << ", values: " << currentState->values.size() << std::endl;
+    }
+  }
+
+  // Only calculate diagonal of Hessian (expensive) once per outer iteration, if we need it
+  VectorValues sqrtHessianDiagonal;
+  if (params_.diagonalDamping) {
+    sqrtHessianDiagonal = linear->hessianDiagonal();
+    for (Vector& v : sqrtHessianDiagonal | map_values) {
+      v = v.cwiseMax(params_.minDiagonal).cwiseMin(params_.maxDiagonal).cwiseSqrt();
     }
   }
 
   // Keep increasing lambda until we make make progress
-  while (true) {
+  while (!tryLambda(*linear, sqrtHessianDiagonal)) {
+    auto newState = static_cast<const State*>(state_.get());
+    writeLogFile(newState->error);
+  }
 
-#ifdef GTSAM_USING_NEW_BOOST_TIMERS
-    boost::timer::cpu_timer lamda_iteration_timer;
-    lamda_iteration_timer.start();
-#else
-    boost::timer lamda_iteration_timer;
-    lamda_iteration_timer.restart();
-#endif
-
-    if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-      cout << "trying lambda = " << state_.lambda << endl;
-
-    // Build damped system for this lambda (adds prior factors that make it like gradient descent)
-    GaussianFactorGraph::shared_ptr dampedSystemPtr = buildDampedSystem(*linear);
-    GaussianFactorGraph &dampedSystem = (*dampedSystemPtr);
-
-    // Try solving
-    double modelFidelity = 0.0;
-    bool step_is_successful = false;
-    bool stopSearchingLambda = false;
-    double newError = numeric_limits<double>::infinity(), costChange;
-    Values newValues;
-    VectorValues delta;
-
-    bool systemSolvedSuccessfully;
-    try {
-      // ============ Solve is where most computation happens !! =================
-      delta = solve(dampedSystem, state_.values, params_);
-      systemSolvedSuccessfully = true;
-    } catch (const IndeterminantLinearSystemException& e) {
-      systemSolvedSuccessfully = false;
-    }
-
-    if (systemSolvedSuccessfully) {
-      state_.reuseDiagonal = true;
-
-      if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-        cout << "linear delta norm = " << delta.norm() << endl;
-      if (lmVerbosity >= LevenbergMarquardtParams::TRYDELTA)
-        delta.print("delta");
-
-      // cost change in the linearized system (old - new)
-      double newlinearizedError = linear->error(delta);
-
-      double linearizedCostChange = state_.error - newlinearizedError;
-      if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-              cout << "newlinearizedError = " << newlinearizedError <<
-              "  linearizedCostChange = " << linearizedCostChange << endl;
-
-      if (linearizedCostChange >= 0) { // step is valid
-        // update values
-        gttic(retract);
-        // ============ This is where the solution is updated ====================
-        newValues = state_.values.retract(delta);
-        // =======================================================================
-        gttoc(retract);
-
-        // compute new error
-        gttic(compute_error);
-        if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-          cout << "calculating error:" << endl;
-        newError = graph_.error(newValues);
-        gttoc(compute_error);
-
-        if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-          cout << "old error (" << state_.error
-              << ") new (tentative) error (" << newError << ")" << endl;
-
-        // cost change in the original, nonlinear system (old - new)
-        costChange = state_.error - newError;
-
-        if (linearizedCostChange > 1e-20) { // the (linear) error has to decrease to satisfy this condition
-          // fidelity of linearized model VS original system between
-          modelFidelity = costChange / linearizedCostChange;
-          // if we decrease the error in the nonlinear system and modelFidelity is above threshold
-          step_is_successful = modelFidelity > params_.minModelFidelity;
-          if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-            cout << "modelFidelity: " << modelFidelity << endl;
-        } // else we consider the step non successful and we either increase lambda or stop if error change is small
-
-        double minAbsoluteTolerance = params_.relativeErrorTol * state_.error;
-        // if the change is small we terminate
-        if (fabs(costChange) < minAbsoluteTolerance){
-          if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-                        cout << "fabs(costChange)="<<fabs(costChange) << "  minAbsoluteTolerance="<< minAbsoluteTolerance
-                        << " (relativeErrorTol=" << params_.relativeErrorTol << ")" << endl;
-          stopSearchingLambda = true;
-        }
-      }
-    }
-
-    if (lmVerbosity == LevenbergMarquardtParams::SUMMARY) {
-      // do timing
-#ifdef GTSAM_USING_NEW_BOOST_TIMERS
-      double iterationTime = 1e-9 * lamda_iteration_timer.elapsed().wall;
-#else
-      double iterationTime = lamda_iteration_timer.elapsed();
-#endif
-      if (state_.iterations == 0)
-        cout << "iter      cost      cost_change    lambda  success iter_time" << endl;
-
-      cout << boost::format("% 4d % 8e   % 3.2e   % 3.2e  % 4d   % 3.2e") %
-                  state_.iterations % newError % costChange % state_.lambda %
-                  systemSolvedSuccessfully % iterationTime << endl;
-    }
-
-    ++state_.totalNumberInnerIterations;
-
-    if (step_is_successful) { // we have successfully decreased the cost and we have good modelFidelity
-      state_.values.swap(newValues);
-      state_.error = newError;
-      decreaseLambda(modelFidelity);
-      writeLogFile(state_.error);
-      break;
-    } else if (!stopSearchingLambda) { // we failed to solved the system or we had no decrease in cost
-      if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-        cout << "increasing lambda" << endl;
-      increaseLambda();
-      writeLogFile(state_.error);
-
-      // check if lambda is too big
-      if (state_.lambda >= params_.lambdaUpperBound) {
-        if (nloVerbosity >= NonlinearOptimizerParams::TERMINATION ||
-            lmVerbosity == LevenbergMarquardtParams::SUMMARY)
-          cout << "Warning:  Levenberg-Marquardt giving up because "
-              "cannot decrease error with maximum lambda" << endl;
-        break;
-      }
-    } else { // the change in the cost is very small and it is not worth trying bigger lambdas
-      writeLogFile(state_.error);
-      if (lmVerbosity >= LevenbergMarquardtParams::TRYLAMBDA)
-              cout << "Levenberg-Marquardt: stopping as relative cost reduction is small" << endl;
-      break;
-    }
-  } // end while
-
-  // Increment the iteration counter
-  ++state_.iterations;
-}
-
-/* ************************************************************************* */
-LevenbergMarquardtParams LevenbergMarquardtOptimizer::ensureHasOrdering(
-    LevenbergMarquardtParams params, const NonlinearFactorGraph& graph) const {
-  if (!params.ordering)
-    params.ordering = Ordering::Create(params.orderingType, graph);
-  return params;
+  return linear;
 }
 
 } /* namespace gtsam */
diff --git a/gtsam/nonlinear/LevenbergMarquardtOptimizer.h b/gtsam/nonlinear/LevenbergMarquardtOptimizer.h
index 2be4a218e..904e08770 100644
--- a/gtsam/nonlinear/LevenbergMarquardtOptimizer.h
+++ b/gtsam/nonlinear/LevenbergMarquardtOptimizer.h
@@ -11,173 +11,38 @@
 
 /**
  * @file    LevenbergMarquardtOptimizer.h
- * @brief   
+ * @brief   A nonlinear optimizer that uses the Levenberg-Marquardt trust-region scheme
  * @author  Richard Roberts
- * @date   Feb 26, 2012
+ * @author  Frank Dellaert
+ * @author  Luca Carlone
+ * @date    Feb 26, 2012
  */
 
 #pragma once
 
 #include <gtsam/nonlinear/NonlinearOptimizer.h>
+#include <gtsam/nonlinear/LevenbergMarquardtParams.h>
 #include <gtsam/linear/VectorValues.h>
-#include <boost/date_time/posix_time/posix_time.hpp>
 
 class NonlinearOptimizerMoreOptimizationTest;
 
 namespace gtsam {
 
-class LevenbergMarquardtOptimizer;
-
-/** Parameters for Levenberg-Marquardt optimization.  Note that this parameters
- * class inherits from NonlinearOptimizerParams, which specifies the parameters
- * common to all nonlinear optimization algorithms.  This class also contains
- * all of those parameters.
- */
-class GTSAM_EXPORT LevenbergMarquardtParams: public NonlinearOptimizerParams {
-
-public:
-  /** See LevenbergMarquardtParams::lmVerbosity */
-  enum VerbosityLM {
-    SILENT = 0, SUMMARY, TERMINATION, LAMBDA, TRYLAMBDA, TRYCONFIG, DAMPED, TRYDELTA
-  };
-
-  static VerbosityLM verbosityLMTranslator(const std::string &s);
-  static std::string verbosityLMTranslator(VerbosityLM value);
-
-public:
-
-  double lambdaInitial; ///< The initial Levenberg-Marquardt damping term (default: 1e-5)
-  double lambdaFactor; ///< The amount by which to multiply or divide lambda when adjusting lambda (default: 10.0)
-  double lambdaUpperBound; ///< The maximum lambda to try before assuming the optimization has failed (default: 1e5)
-  double lambdaLowerBound; ///< The minimum lambda used in LM (default: 0)
-  VerbosityLM verbosityLM; ///< The verbosity level for Levenberg-Marquardt (default: SILENT), see also NonlinearOptimizerParams::verbosity
-  double minModelFidelity; ///< Lower bound for the modelFidelity to accept the result of an LM iteration
-  std::string logFile; ///< an optional CSV log file, with [iteration, time, error, lambda]
-  bool diagonalDamping; ///< if true, use diagonal of Hessian
-  bool useFixedLambdaFactor; ///< if true applies constant increase (or decrease) to lambda according to lambdaFactor
-  double minDiagonal; ///< when using diagonal damping saturates the minimum diagonal entries (default: 1e-6)
-  double maxDiagonal; ///< when using diagonal damping saturates the maximum diagonal entries (default: 1e32)
-
-  LevenbergMarquardtParams()
-      : verbosityLM(SILENT),
-        diagonalDamping(false),
-        minDiagonal(1e-6),
-        maxDiagonal(1e32) {
-    SetLegacyDefaults(this);
-  }
-
-  static void SetLegacyDefaults(LevenbergMarquardtParams* p) {
-    // Relevant NonlinearOptimizerParams:
-    p->maxIterations = 100;
-    p->relativeErrorTol = 1e-5;
-    p->absoluteErrorTol = 1e-5;
-    // LM-specific:
-    p->lambdaInitial = 1e-5;
-    p->lambdaFactor = 10.0;
-    p->lambdaUpperBound = 1e5;
-    p->lambdaLowerBound = 0.0;
-    p->minModelFidelity = 1e-3;
-    p->diagonalDamping = false;
-    p->useFixedLambdaFactor = true;
-  }
-
-  // these do seem to work better for SFM
-  static void SetCeresDefaults(LevenbergMarquardtParams* p) {
-    // Relevant NonlinearOptimizerParams:
-    p->maxIterations = 50;
-    p->absoluteErrorTol = 0;     // No corresponding option in CERES
-    p->relativeErrorTol = 1e-6;  // This is function_tolerance
-    // LM-specific:
-    p->lambdaUpperBound = 1e32;
-    p->lambdaLowerBound = 1e-16;
-    p->lambdaInitial = 1e-04;
-    p->lambdaFactor = 2.0;
-    p->minModelFidelity = 1e-3;  // options.min_relative_decrease in CERES
-    p->diagonalDamping = true;
-    p->useFixedLambdaFactor = false;  // This is important
-  }
-
-  static LevenbergMarquardtParams LegacyDefaults() {
-    LevenbergMarquardtParams p;
-    SetLegacyDefaults(&p);
-    return p;
-  }
-
-  static LevenbergMarquardtParams CeresDefaults() {
-    LevenbergMarquardtParams p;
-    SetCeresDefaults(&p);
-    return p;
-  }
-
-  virtual ~LevenbergMarquardtParams() {}
-  virtual void print(const std::string& str = "") const;
-
-  /// @name Getters/Setters, mainly for MATLAB. Use fields above in C++.
-  /// @{
-  bool getDiagonalDamping() const { return diagonalDamping; }
-  double getlambdaFactor() const { return lambdaFactor; }
-  double getlambdaInitial() const { return lambdaInitial; }
-  double getlambdaLowerBound() const { return lambdaLowerBound; }
-  double getlambdaUpperBound() const { return lambdaUpperBound; }
-  std::string getLogFile() const { return logFile; }
-  std::string getVerbosityLM() const { return verbosityLMTranslator(verbosityLM);}
-  void setDiagonalDamping(bool flag) { diagonalDamping = flag; }
-  void setlambdaFactor(double value) { lambdaFactor = value; }
-  void setlambdaInitial(double value) { lambdaInitial = value; }
-  void setlambdaLowerBound(double value) { lambdaLowerBound = value; }
-  void setlambdaUpperBound(double value) { lambdaUpperBound = value; }
-  void setLogFile(const std::string& s) { logFile = s; }
-  void setUseFixedLambdaFactor(bool flag) { useFixedLambdaFactor = flag;}
-  void setVerbosityLM(const std::string& s) { verbosityLM = verbosityLMTranslator(s);}
-  // @}
-};
-
-/**
- * State for LevenbergMarquardtOptimizer
- */
-class GTSAM_EXPORT LevenbergMarquardtState: public NonlinearOptimizerState {
-
-public:
-  double lambda;
-  boost::posix_time::ptime startTime;
-  int totalNumberInnerIterations; //< The total number of inner iterations in the optimization (for each iteration, LM may try multiple iterations with different lambdas)
-  VectorValues hessianDiagonal; //< we only update hessianDiagonal when reuseDiagonal = false
-  bool reuseDiagonal; ///< an additional option in Ceres for diagonalDamping
-
-  LevenbergMarquardtState() {} // used in LM constructor but immediately overwritten
-
-  void initTime() {
-    startTime = boost::posix_time::microsec_clock::universal_time();
-  }
-
-  virtual ~LevenbergMarquardtState() {
-  }
-
-protected:
-  LevenbergMarquardtState(const NonlinearFactorGraph& graph,
-      const Values& initialValues, const LevenbergMarquardtParams& params,
-      unsigned int iterations = 0) :
-      NonlinearOptimizerState(graph, initialValues, iterations), lambda(
-          params.lambdaInitial), totalNumberInnerIterations(0),reuseDiagonal(false) {
-    initTime();
-  }
-
-  friend class LevenbergMarquardtOptimizer;
-};
-
 /**
  * This class performs Levenberg-Marquardt nonlinear optimization
  */
 class GTSAM_EXPORT LevenbergMarquardtOptimizer: public NonlinearOptimizer {
 
 protected:
-  LevenbergMarquardtParams params_; ///< LM parameters
-  LevenbergMarquardtState state_; ///< optimization state
+  const LevenbergMarquardtParams params_; ///< LM parameters
+  boost::posix_time::ptime startTime_;
+
+  void initTime();
 
 public:
   typedef boost::shared_ptr<LevenbergMarquardtOptimizer> shared_ptr;
 
-  /// @name Standard interface
+  /// @name Constructors/Destructor
   /// @{
 
   /** Standard constructor, requires a nonlinear factor graph, initial
@@ -188,12 +53,8 @@ public:
    * @param initialValues The initial variable assignments
    * @param params The optimization parameters
    */
-  LevenbergMarquardtOptimizer(
-      const NonlinearFactorGraph& graph, const Values& initialValues,
-      const LevenbergMarquardtParams& params = LevenbergMarquardtParams())
-      : NonlinearOptimizer(graph),
-        params_(ensureHasOrdering(params, graph)),
-        state_(graph, initialValues, params_) {}
+  LevenbergMarquardtOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
+                              const LevenbergMarquardtParams& params = LevenbergMarquardtParams());
 
   /** Standard constructor, requires a nonlinear factor graph, initial
    * variable assignments, and optimization parameters.  For convenience this
@@ -202,33 +63,27 @@ public:
    * @param graph The nonlinear factor graph to optimize
    * @param initialValues The initial variable assignments
    */
-  LevenbergMarquardtOptimizer(
-      const NonlinearFactorGraph& graph, const Values& initialValues,
-      const Ordering& ordering,
-      const LevenbergMarquardtParams& params = LevenbergMarquardtParams())
-      : NonlinearOptimizer(graph), params_(params) {
-    params_.ordering = ordering;
-    state_ = LevenbergMarquardtState(graph, initialValues, params_);
+  LevenbergMarquardtOptimizer(const NonlinearFactorGraph& graph, const Values& initialValues,
+                              const Ordering& ordering,
+                              const LevenbergMarquardtParams& params = LevenbergMarquardtParams());
+
+  /** Virtual destructor */
+  virtual ~LevenbergMarquardtOptimizer() {
   }
 
+  /// @}
+
+  /// @name Standard interface
+  /// @{
+
   /// Access the current damping value
-  double lambda() const {
-    return state_.lambda;
-  }
-
-  // Apply policy to increase lambda if the current update was successful (stepQuality not used in the naive policy)
-  void increaseLambda();
-
-  // Apply policy to decrease lambda if the current update was NOT successful (stepQuality not used in the naive policy)
-  void decreaseLambda(double stepQuality);
+  double lambda() const;
 
   /// Access the current number of inner iterations
-  int getInnerIterations() const {
-    return state_.totalNumberInnerIterations;
-  }
+  int getInnerIterations() const;
 
   /// print
-  virtual void print(const std::string& str = "") const {
+  void print(const std::string& str = "") const {
     std::cout << str << "LevenbergMarquardtOptimizer" << std::endl;
     this->params_.print("  parameters:\n");
   }
@@ -238,73 +93,37 @@ public:
   /// @name Advanced interface
   /// @{
 
-  /** Virtual destructor */
-  virtual ~LevenbergMarquardtOptimizer() {
-  }
-
   /** Perform a single iteration, returning a new NonlinearOptimizer class
    * containing the updated variable assignments, which may be retrieved with
    * values().
    */
-  virtual void iterate();
+  GaussianFactorGraph::shared_ptr iterate() override;
 
   /** Read-only access the parameters */
   const LevenbergMarquardtParams& params() const {
     return params_;
   }
 
-  /** Read/write access the parameters */
-  LevenbergMarquardtParams& params() {
-    return params_;
-  }
-
-  /** Read-only access the last state */
-  const LevenbergMarquardtState& state() const {
-    return state_;
-  }
-
-  /** Read/write access the last state. When modifying the state, the error, etc. must be consistent before calling iterate() */
-  LevenbergMarquardtState& state() {
-    return state_;
-  }
-
-  /** Build a damped system for a specific lambda */
-  GaussianFactorGraph::shared_ptr buildDampedSystem(const GaussianFactorGraph& linear);
-  friend class ::NonlinearOptimizerMoreOptimizationTest;
-
-  /** Small struct to cache objects needed for damping.
-   * This is used in buildDampedSystem  */
-  struct NoiseCacheItem {
-    Matrix A;
-    Vector b;
-    SharedDiagonal model;
-  };
-
-  /// Noise model Cache
-  typedef std::vector<NoiseCacheItem> NoiseCacheVector;
-
   void writeLogFile(double currentError);
 
+  /** linearize, can be overwritten */
+  virtual GaussianFactorGraph::shared_ptr linearize() const;
+
+  /** Build a damped system for a specific lambda -- for testing only */
+  GaussianFactorGraph buildDampedSystem(const GaussianFactorGraph& linear,
+                                        const VectorValues& sqrtHessianDiagonal) const;
+
+  /** Inner loop, changes state, returns true if successful or giving up */
+  bool tryLambda(const GaussianFactorGraph& linear, const VectorValues& sqrtHessianDiagonal);
+
   /// @}
 
 protected:
 
   /** Access the parameters (base class version) */
-  virtual const NonlinearOptimizerParams& _params() const {
+  const NonlinearOptimizerParams& _params() const override {
     return params_;
   }
-
-  /** Access the state (base class version) */
-  virtual const NonlinearOptimizerState& _state() const {
-    return state_;
-  }
-
-  /** Internal function for computing a COLAMD ordering if no ordering is specified */
-  LevenbergMarquardtParams ensureHasOrdering(LevenbergMarquardtParams params,
-      const NonlinearFactorGraph& graph) const;
-
-  /** linearize, can  be overwritten */
-  virtual GaussianFactorGraph::shared_ptr linearize() const;
 };
 
 }
diff --git a/gtsam/nonlinear/LevenbergMarquardtParams.cpp b/gtsam/nonlinear/LevenbergMarquardtParams.cpp
new file mode 100644
index 000000000..5d558a43a
--- /dev/null
+++ b/gtsam/nonlinear/LevenbergMarquardtParams.cpp
@@ -0,0 +1,108 @@
+/* ----------------------------------------------------------------------------
+
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
+ * Atlanta, Georgia 30332-0415
+ * All Rights Reserved
+ * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
+
+ * See LICENSE for the license information
+
+ * -------------------------------------------------------------------------- */
+
+/**
+ * @file    LevenbergMarquardtParams.cpp
+ * @brief   Parameters for Levenberg-Marquardt trust-region scheme
+ * @author  Richard Roberts
+ * @author  Frank Dellaert
+ * @author  Luca Carlone
+ * @date    Feb 26, 2012
+ */
+
+#include <gtsam/nonlinear/LevenbergMarquardtParams.h>
+#include <boost/algorithm/string/case_conv.hpp>
+#include <boost/range/adaptor/map.hpp>
+#include <iostream>
+#include <string>
+
+using namespace std;
+
+namespace gtsam {
+
+/* ************************************************************************* */
+LevenbergMarquardtParams::VerbosityLM LevenbergMarquardtParams::verbosityLMTranslator(
+    const std::string &src) {
+  std::string s = src;
+  boost::algorithm::to_upper(s);
+  if (s == "SILENT")
+    return LevenbergMarquardtParams::SILENT;
+  if (s == "SUMMARY")
+    return LevenbergMarquardtParams::SUMMARY;
+  if (s == "LAMBDA")
+    return LevenbergMarquardtParams::LAMBDA;
+  if (s == "TRYLAMBDA")
+    return LevenbergMarquardtParams::TRYLAMBDA;
+  if (s == "TRYCONFIG")
+    return LevenbergMarquardtParams::TRYCONFIG;
+  if (s == "TRYDELTA")
+    return LevenbergMarquardtParams::TRYDELTA;
+  if (s == "DAMPED")
+    return LevenbergMarquardtParams::DAMPED;
+
+  /* default is silent */
+  return LevenbergMarquardtParams::SILENT;
+}
+
+/* ************************************************************************* */
+std::string LevenbergMarquardtParams::verbosityLMTranslator(
+    VerbosityLM value) {
+  std::string s;
+  switch (value) {
+  case LevenbergMarquardtParams::SILENT:
+    s = "SILENT";
+    break;
+  case LevenbergMarquardtParams::SUMMARY:
+    s = "SUMMARY";
+    break;
+  case LevenbergMarquardtParams::TERMINATION:
+    s = "TERMINATION";
+    break;
+  case LevenbergMarquardtParams::LAMBDA:
+    s = "LAMBDA";
+    break;
+  case LevenbergMarquardtParams::TRYLAMBDA:
+    s = "TRYLAMBDA";
+    break;
+  case LevenbergMarquardtParams::TRYCONFIG:
+    s = "TRYCONFIG";
+    break;
+  case LevenbergMarquardtParams::TRYDELTA:
+    s = "TRYDELTA";
+    break;
+  case LevenbergMarquardtParams::DAMPED:
+    s = "DAMPED";
+    break;
+  default:
+    s = "UNDEFINED";
+    break;
+  }
+  return s;
+}
+
+/* ************************************************************************* */
+void LevenbergMarquardtParams::print(const std::string& str) const {
+  NonlinearOptimizerParams::print(str);
+  std::cout << "              lambdaInitial: " << lambdaInitial << "\n";
+  std::cout << "               lambdaFactor: " << lambdaFactor << "\n";
+  std::cout << "           lambdaUpperBound: " << lambdaUpperBound << "\n";
+  std::cout << "           lambdaLowerBound: " << lambdaLowerBound << "\n";
+  std::cout << "           minModelFidelity: " << minModelFidelity << "\n";
+  std::cout << "            diagonalDamping: " << diagonalDamping << "\n";
+  std::cout << "                minDiagonal: " << minDiagonal << "\n";
+  std::cout << "                maxDiagonal: " << maxDiagonal << "\n";
+  std::cout << "                verbosityLM: "
+      << verbosityLMTranslator(verbosityLM) << "\n";
+  std::cout.flush();
+}
+
+} /* namespace gtsam */
+
diff --git a/gtsam/nonlinear/LevenbergMarquardtParams.h b/gtsam/nonlinear/LevenbergMarquardtParams.h
new file mode 100644
index 000000000..abb8c3c22
--- /dev/null
+++ b/gtsam/nonlinear/LevenbergMarquardtParams.h
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------------------
+
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
+ * Atlanta, Georgia 30332-0415
+ * All Rights Reserved
+ * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
+
+ * See LICENSE for the license information
+
+ * -------------------------------------------------------------------------- */
+
+/**
+ * @file    LevenbergMarquardtParams.h
+ * @brief   Parameters for Levenberg-Marquardt trust-region scheme
+ * @author  Richard Roberts
+ * @author  Frank Dellaert
+ * @author  Luca Carlone
+ * @date    Feb 26, 2012
+ */
+
+#pragma once
+
+#include <gtsam/nonlinear/NonlinearOptimizerParams.h>
+#include <gtsam/nonlinear/NonlinearFactorGraph.h>
+
+namespace gtsam {
+
+/** Parameters for Levenberg-Marquardt optimization.  Note that this parameters
+ * class inherits from NonlinearOptimizerParams, which specifies the parameters
+ * common to all nonlinear optimization algorithms.  This class also contains
+ * all of those parameters.
+ */
+class GTSAM_EXPORT LevenbergMarquardtParams: public NonlinearOptimizerParams {
+
+public:
+  /** See LevenbergMarquardtParams::lmVerbosity */
+  enum VerbosityLM {
+    SILENT = 0, SUMMARY, TERMINATION, LAMBDA, TRYLAMBDA, TRYCONFIG, DAMPED, TRYDELTA
+  };
+
+  static VerbosityLM verbosityLMTranslator(const std::string &s);
+  static std::string verbosityLMTranslator(VerbosityLM value);
+
+public:
+
+  double lambdaInitial; ///< The initial Levenberg-Marquardt damping term (default: 1e-5)
+  double lambdaFactor; ///< The amount by which to multiply or divide lambda when adjusting lambda (default: 10.0)
+  double lambdaUpperBound; ///< The maximum lambda to try before assuming the optimization has failed (default: 1e5)
+  double lambdaLowerBound; ///< The minimum lambda used in LM (default: 0)
+  VerbosityLM verbosityLM; ///< The verbosity level for Levenberg-Marquardt (default: SILENT), see also NonlinearOptimizerParams::verbosity
+  double minModelFidelity; ///< Lower bound for the modelFidelity to accept the result of an LM iteration
+  std::string logFile; ///< an optional CSV log file, with [iteration, time, error, lambda]
+  bool diagonalDamping; ///< if true, use diagonal of Hessian
+  bool useFixedLambdaFactor; ///< if true applies constant increase (or decrease) to lambda according to lambdaFactor
+  double minDiagonal; ///< when using diagonal damping saturates the minimum diagonal entries (default: 1e-6)
+  double maxDiagonal; ///< when using diagonal damping saturates the maximum diagonal entries (default: 1e32)
+
+  LevenbergMarquardtParams()
+      : verbosityLM(SILENT),
+        diagonalDamping(false),
+        minDiagonal(1e-6),
+        maxDiagonal(1e32) {
+    SetLegacyDefaults(this);
+  }
+
+  static void SetLegacyDefaults(LevenbergMarquardtParams* p) {
+    // Relevant NonlinearOptimizerParams:
+    p->maxIterations = 100;
+    p->relativeErrorTol = 1e-5;
+    p->absoluteErrorTol = 1e-5;
+    // LM-specific:
+    p->lambdaInitial = 1e-5;
+    p->lambdaFactor = 10.0;
+    p->lambdaUpperBound = 1e5;
+    p->lambdaLowerBound = 0.0;
+    p->minModelFidelity = 1e-3;
+    p->diagonalDamping = false;
+    p->useFixedLambdaFactor = true;
+  }
+
+  // these do seem to work better for SFM
+  static void SetCeresDefaults(LevenbergMarquardtParams* p) {
+    // Relevant NonlinearOptimizerParams:
+    p->maxIterations = 50;
+    p->absoluteErrorTol = 0;     // No corresponding option in CERES
+    p->relativeErrorTol = 1e-6;  // This is function_tolerance
+    // LM-specific:
+    p->lambdaUpperBound = 1e32;
+    p->lambdaLowerBound = 1e-16;
+    p->lambdaInitial = 1e-04;
+    p->lambdaFactor = 2.0;
+    p->minModelFidelity = 1e-3;  // options.min_relative_decrease in CERES
+    p->diagonalDamping = true;
+    p->useFixedLambdaFactor = false;  // This is important
+  }
+
+  static LevenbergMarquardtParams LegacyDefaults() {
+    LevenbergMarquardtParams p;
+    SetLegacyDefaults(&p);
+    return p;
+  }
+
+  static LevenbergMarquardtParams CeresDefaults() {
+    LevenbergMarquardtParams p;
+    SetCeresDefaults(&p);
+    return p;
+  }
+
+  static LevenbergMarquardtParams EnsureHasOrdering(LevenbergMarquardtParams params,
+                                                    const NonlinearFactorGraph& graph) {
+    if (!params.ordering)
+      params.ordering = Ordering::Create(params.orderingType, graph);
+    return params;
+  }
+
+  static LevenbergMarquardtParams ReplaceOrdering(LevenbergMarquardtParams params,
+                                                  const Ordering& ordering) {
+    params.ordering = ordering;
+    return params;
+  }
+
+  virtual ~LevenbergMarquardtParams() {}
+  void print(const std::string& str = "") const override;
+
+  /// @name Getters/Setters, mainly for MATLAB. Use fields above in C++.
+  /// @{
+  bool getDiagonalDamping() const { return diagonalDamping; }
+  double getlambdaFactor() const { return lambdaFactor; }
+  double getlambdaInitial() const { return lambdaInitial; }
+  double getlambdaLowerBound() const { return lambdaLowerBound; }
+  double getlambdaUpperBound() const { return lambdaUpperBound; }
+  std::string getLogFile() const { return logFile; }
+  std::string getVerbosityLM() const { return verbosityLMTranslator(verbosityLM);}
+  void setDiagonalDamping(bool flag) { diagonalDamping = flag; }
+  void setlambdaFactor(double value) { lambdaFactor = value; }
+  void setlambdaInitial(double value) { lambdaInitial = value; }
+  void setlambdaLowerBound(double value) { lambdaLowerBound = value; }
+  void setlambdaUpperBound(double value) { lambdaUpperBound = value; }
+  void setLogFile(const std::string& s) { logFile = s; }
+  void setUseFixedLambdaFactor(bool flag) { useFixedLambdaFactor = flag;}
+  void setVerbosityLM(const std::string& s) { verbosityLM = verbosityLMTranslator(s);}
+  // @}
+};
+
+}
diff --git a/gtsam/nonlinear/LinearContainerFactor.cpp b/gtsam/nonlinear/LinearContainerFactor.cpp
index 0b5622f28..4f19f36f8 100644
--- a/gtsam/nonlinear/LinearContainerFactor.cpp
+++ b/gtsam/nonlinear/LinearContainerFactor.cpp
@@ -11,14 +11,13 @@
 #include <gtsam/linear/VectorValues.h>
 #include <gtsam/linear/GaussianFactorGraph.h>
 
-
 namespace gtsam {
 
 /* ************************************************************************* */
 void LinearContainerFactor::initializeLinearizationPoint(const Values& linearizationPoint) {
   if (!linearizationPoint.empty()) {
     linearizationPoint_ = Values();
-    for(const gtsam::Key& key: this->keys()) {
+    for (Key key : keys()) {
       linearizationPoint_->insert(key, linearizationPoint.at(key));
     }
   } else {
@@ -81,7 +80,7 @@ double LinearContainerFactor::error(const Values& c) const {
 
   // Extract subset of values for comparison
   Values csub;
-  for(const gtsam::Key& key: keys())
+  for (Key key : keys())
     csub.insert(key, c.at(key));
 
   // create dummy ordering for evaluation
@@ -110,7 +109,7 @@ GaussianFactor::shared_ptr LinearContainerFactor::linearize(const Values& c) con
 
   // Extract subset of values
   Values subsetC;
-  for(const gtsam::Key& key: this->keys())
+  for (Key key : keys())
     subsetC.insert(key, c.at(key));
 
   // Determine delta between linearization points using new ordering
@@ -122,10 +121,11 @@ GaussianFactor::shared_ptr LinearContainerFactor::linearize(const Values& c) con
     jacFactor->getb() = -jacFactor->unweighted_error(delta);
   } else {
     HessianFactor::shared_ptr hesFactor = boost::dynamic_pointer_cast<HessianFactor>(linFactor);
-    SymmetricBlockMatrix::constBlock Gview = hesFactor->matrixObject().range(0, hesFactor->size(), 0, hesFactor->size());
+
+    const auto view = hesFactor->informationView();
     Vector deltaVector = delta.vector(keys());
-    Vector G_delta = Gview.selfadjointView() * deltaVector;
-    hesFactor->constantTerm() += deltaVector.dot(G_delta) - 2.0 * deltaVector.dot(hesFactor->linearTerm());
+    Vector G_delta = view * deltaVector;
+    hesFactor->constantTerm() += deltaVector.dot(G_delta) - 2.0 * deltaVector.dot(hesFactor->linearTerm().col(0));
     hesFactor->linearTerm() -= G_delta;
   }
 
@@ -165,14 +165,13 @@ NonlinearFactor::shared_ptr LinearContainerFactor::negateToNonlinear() const {
 }
 
 /* ************************************************************************* */
-NonlinearFactorGraph LinearContainerFactor::convertLinearGraph(
-    const GaussianFactorGraph& linear_graph, const Values& linearizationPoint)
-{
+NonlinearFactorGraph LinearContainerFactor::ConvertLinearGraph(
+    const GaussianFactorGraph& linear_graph, const Values& linearizationPoint) {
   NonlinearFactorGraph result;
-  for(const GaussianFactor::shared_ptr& f: linear_graph)
+  result.reserve(linear_graph.size());
+  for (const auto& f : linear_graph)
     if (f)
-      result.push_back(NonlinearFactorGraph::sharedFactor(
-          new LinearContainerFactor(f, linearizationPoint)));
+      result += boost::make_shared<LinearContainerFactor>(f, linearizationPoint);
   return result;
 }
 
diff --git a/gtsam/nonlinear/LinearContainerFactor.h b/gtsam/nonlinear/LinearContainerFactor.h
index 307c7f001..928b59e77 100644
--- a/gtsam/nonlinear/LinearContainerFactor.h
+++ b/gtsam/nonlinear/LinearContainerFactor.h
@@ -11,7 +11,6 @@
 
 #include <gtsam/nonlinear/NonlinearFactorGraph.h>
 
-
 namespace gtsam {
 
   // Forward declarations
@@ -141,9 +140,16 @@ public:
    * Utility function for converting linear graphs to nonlinear graphs
    * consisting of LinearContainerFactors.
    */
-  static NonlinearFactorGraph convertLinearGraph(const GaussianFactorGraph& linear_graph,
+  static NonlinearFactorGraph ConvertLinearGraph(const GaussianFactorGraph& linear_graph,
       const Values& linearizationPoint = Values());
 
+#ifdef GTSAM_ALLOW_DEPRECATED_SINCE_V4
+  static NonlinearFactorGraph convertLinearGraph(const GaussianFactorGraph& linear_graph,
+      const Values& linearizationPoint = Values()) {
+    return ConvertLinearGraph(linear_graph, linearizationPoint);
+  }
+#endif
+
 protected:
   void initializeLinearizationPoint(const Values& linearizationPoint);
 
diff --git a/gtsam/nonlinear/Marginals.cpp b/gtsam/nonlinear/Marginals.cpp
index ac8fa4e89..68da1250e 100644
--- a/gtsam/nonlinear/Marginals.cpp
+++ b/gtsam/nonlinear/Marginals.cpp
@@ -82,9 +82,7 @@ Matrix Marginals::marginalCovariance(Key variable) const {
 /* ************************************************************************* */
 JointMarginal Marginals::jointMarginalCovariance(const std::vector<Key>& variables) const {
   JointMarginal info = jointMarginalInformation(variables);
-  info.blockMatrix_.full().triangularView() =
-    info.blockMatrix_.full().selfadjointView().llt().solve(
-    Matrix::Identity(info.blockMatrix_.full().rows(), info.blockMatrix_.full().rows())).triangularView<Eigen::Upper>();
+  info.blockMatrix_.invertInPlace();
   return info;
 }
 
diff --git a/gtsam/nonlinear/Marginals.h b/gtsam/nonlinear/Marginals.h
index ad723015d..85f694bd2 100644
--- a/gtsam/nonlinear/Marginals.h
+++ b/gtsam/nonlinear/Marginals.h
@@ -91,13 +91,6 @@ protected:
   FastMap<Key, size_t> indices_;
 
 public:
-  /** A block view of the joint marginal - this stores a reference to the
-   * JointMarginal object, so the JointMarginal object must be kept in scope
-   * while this block view is needed, otherwise assign this block object to a
-   * Matrix to store it.
-   */
-  typedef SymmetricBlockMatrix::constBlock Block;
-
   /** Access a block, corresponding to a pair of variables, of the joint
    * marginal.  Each block is accessed by its "vertical position",
    * corresponding to the variable with nonlinear Key \c iVariable and
@@ -111,19 +104,21 @@ public:
    * @param iVariable The nonlinear Key specifying the "vertical position" of the requested block
    * @param jVariable The nonlinear Key specifying the "horizontal position" of the requested block
    */
-  Block operator()(Key iVariable, Key jVariable) const {
-    return blockMatrix_(indices_.at(iVariable), indices_.at(jVariable)); }
+  Matrix operator()(Key iVariable, Key jVariable) const {
+    const auto indexI = indices_.at(iVariable);
+    const auto indexJ = indices_.at(jVariable);
+    return blockMatrix_.block(indexI, indexJ);
+  }
 
   /** Synonym for operator() */
-  Block at(Key iVariable, Key jVariable) const {
-    return (*this)(iVariable, jVariable); }
+  Matrix at(Key iVariable, Key jVariable) const {
+    return (*this)(iVariable, jVariable);
+  }
 
-  /** The full, dense covariance/information matrix of the joint marginal. This returns
-   * a reference to the JointMarginal object, so the JointMarginal object must be kept
-   * in scope while this view is needed. Otherwise assign this block object to a Matrix
-   * to store it.
-   */
-  Eigen::SelfAdjointView<const Matrix, Eigen::Upper> fullMatrix() const { return blockMatrix_.matrix(); }
+  /** The full, dense covariance/information matrix of the joint marginal. */
+  Matrix fullMatrix() const {
+    return blockMatrix_.selfadjointView();
+  }
 
   /** Print */
   void print(const std::string& s = "", const KeyFormatter& formatter = DefaultKeyFormatter) const;
diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp
index 6d7196ff5..32aead750 100644
--- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp
+++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp
@@ -17,16 +17,17 @@
  */
 
 #include <gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h>
+#include <gtsam/nonlinear/internal/NonlinearOptimizerState.h>
 #include <gtsam/nonlinear/Values.h>
 #include <gtsam/linear/GaussianFactorGraph.h>
 #include <gtsam/linear/VectorValues.h>
 
 #include <cmath>
 
-using namespace std;
-
 namespace gtsam {
 
+typedef internal::NonlinearOptimizerState State;
+
 /**
  * @brief Return the gradient vector of a nonlinear factor graph
  * @param nfg the graph
@@ -40,8 +41,11 @@ static VectorValues gradientInPlace(const NonlinearFactorGraph &nfg,
   return linear->gradientAtZero();
 }
 
-double NonlinearConjugateGradientOptimizer::System::error(
-    const State &state) const {
+NonlinearConjugateGradientOptimizer::NonlinearConjugateGradientOptimizer(
+    const NonlinearFactorGraph& graph, const Values& initialValues, const Parameters& params)
+    : Base(graph, std::unique_ptr<State>(new State(initialValues, graph.error(initialValues)))) {}
+
+double NonlinearConjugateGradientOptimizer::System::error(const State& state) const {
   return graph_.error(state);
 }
 
@@ -57,21 +61,26 @@ NonlinearConjugateGradientOptimizer::System::State NonlinearConjugateGradientOpt
   return current.retract(step);
 }
 
-void NonlinearConjugateGradientOptimizer::iterate() {
+GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() {
+  Values newValues;
   int dummy;
-  boost::tie(state_.values, dummy) = nonlinearConjugateGradient<System, Values>(
-      System(graph_), state_.values, params_, true /* single iterations */);
-  ++state_.iterations;
-  state_.error = graph_.error(state_.values);
+  boost::tie(newValues, dummy) = nonlinearConjugateGradient<System, Values>(
+      System(graph_), state_->values, params_, true /* single iteration */);
+  state_.reset(new State(newValues, graph_.error(newValues), state_->iterations + 1));
+
+  // NOTE(frank): We don't linearize this system, so we must return null here.
+  return nullptr;
 }
 
 const Values& NonlinearConjugateGradientOptimizer::optimize() {
   // Optimize until convergence
   System system(graph_);
-  boost::tie(state_.values, state_.iterations) = //
-      nonlinearConjugateGradient(system, state_.values, params_, false);
-  state_.error = graph_.error(state_.values);
-  return state_.values;
+  Values newValues;
+  int iterations;
+  boost::tie(newValues, iterations) =
+      nonlinearConjugateGradient(system, state_->values, params_, false);
+  state_.reset(new State(std::move(newValues), graph_.error(newValues), iterations));
+  return state_->values;
 }
 
 } /* namespace gtsam */
diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h
index 04d4734a4..7b5a0d493 100644
--- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h
+++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h
@@ -25,16 +25,7 @@
 namespace gtsam {
 
 /**  An implementation of the nonlinear CG method using the template below */
-class GTSAM_EXPORT NonlinearConjugateGradientState: public NonlinearOptimizerState {
-public:
-  typedef NonlinearOptimizerState Base;
-  NonlinearConjugateGradientState(const NonlinearFactorGraph& graph,
-      const Values& values) :
-      Base(graph, values) {
-  }
-};
-
-class GTSAM_EXPORT NonlinearConjugateGradientOptimizer: public NonlinearOptimizer {
+class GTSAM_EXPORT NonlinearConjugateGradientOptimizer : public NonlinearOptimizer {
 
   /* a class for the nonlinearConjugateGradient template */
   class System {
@@ -63,29 +54,24 @@ public:
   typedef boost::shared_ptr<NonlinearConjugateGradientOptimizer> shared_ptr;
 
 protected:
-  NonlinearConjugateGradientState state_;
   Parameters params_;
 
+  const NonlinearOptimizerParams& _params() const override {
+    return params_;
+  }
+
 public:
 
   /// Constructor
   NonlinearConjugateGradientOptimizer(const NonlinearFactorGraph& graph,
-      const Values& initialValues, const Parameters& params = Parameters()) :
-      Base(graph), state_(graph, initialValues), params_(params) {
-  }
+      const Values& initialValues, const Parameters& params = Parameters());
 
   /// Destructor
   virtual ~NonlinearConjugateGradientOptimizer() {
   }
 
-  virtual void iterate();
-  virtual const Values& optimize();
-  virtual const NonlinearOptimizerState& _state() const {
-    return state_;
-  }
-  virtual const NonlinearOptimizerParams& _params() const {
-    return params_;
-  }
+  GaussianFactorGraph::shared_ptr iterate() override;
+  const Values& optimize() override;
 };
 
 /** Implement the golden-section line search algorithm */
diff --git a/gtsam/nonlinear/NonlinearFactorGraph.cpp b/gtsam/nonlinear/NonlinearFactorGraph.cpp
index ae95fa72b..56234c13c 100644
--- a/gtsam/nonlinear/NonlinearFactorGraph.cpp
+++ b/gtsam/nonlinear/NonlinearFactorGraph.cpp
@@ -23,6 +23,8 @@
 #include <gtsam/nonlinear/Values.h>
 #include <gtsam/nonlinear/NonlinearFactorGraph.h>
 #include <gtsam/linear/GaussianFactorGraph.h>
+#include <gtsam/linear/linearExceptions.h>
+#include <gtsam/linear/VectorValues.h>
 #include <gtsam/inference/Ordering.h>
 #include <gtsam/inference/FactorGraph-inst.h>
 #include <gtsam/config.h> // for GTSAM_USE_TBB
@@ -132,8 +134,8 @@ void NonlinearFactorGraph::saveGraph(std::ostream &stm, const Values& values,
   // Find bounds
   double minX = numeric_limits<double>::infinity(), maxX = -numeric_limits<double>::infinity();
   double minY = numeric_limits<double>::infinity(), maxY = -numeric_limits<double>::infinity();
-  for(Key key: keys) {
-    if(values.exists(key)) {
+  for (const Key& key : keys) {
+    if (values.exists(key)) {
       boost::optional<Point2> xy = getXY(values.at(key), formatting);
       if(xy) {
         if(xy->x() < minX)
@@ -164,8 +166,8 @@ void NonlinearFactorGraph::saveGraph(std::ostream &stm, const Values& values,
   if (formatting.mergeSimilarFactors) {
     // Remove duplicate factors
     std::set<vector<Key> > structure;
-    for(const sharedFactor& factor: *this){
-      if(factor) {
+    for (const sharedFactor& factor : factors_) {
+      if (factor) {
         vector<Key> factorKeys = factor->keys();
         std::sort(factorKeys.begin(), factorKeys.end());
         structure.insert(factorKeys);
@@ -194,8 +196,8 @@ void NonlinearFactorGraph::saveGraph(std::ostream &stm, const Values& values,
     }
   } else {
     // Create factors and variable connections
-    for(size_t i = 0; i < this->size(); ++i) {
-      const NonlinearFactor::shared_ptr& factor = this->at(i);
+    for(size_t i = 0; i < size(); ++i) {
+      const NonlinearFactor::shared_ptr& factor = at(i);
       if(formatting.plotFactorPoints) {
         const FastVector<Key>& keys = factor->keys();
         if (formatting.binaryEdges && keys.size()==2) {
@@ -245,7 +247,7 @@ double NonlinearFactorGraph::error(const Values& values) const {
   gttic(NonlinearFactorGraph_error);
   double total_error = 0.;
   // iterate over all the factors_ to accumulate the log probabilities
-  for(const sharedFactor& factor: this->factors_) {
+  for(const sharedFactor& factor: factors_) {
     if(factor)
       total_error += factor->error(values);
   }
@@ -269,9 +271,9 @@ SymbolicFactorGraph::shared_ptr NonlinearFactorGraph::symbolic() const
 {
   // Generate the symbolic factor graph
   SymbolicFactorGraph::shared_ptr symbolic = boost::make_shared<SymbolicFactorGraph>();
-  symbolic->reserve(this->size());
+  symbolic->reserve(size());
 
-  for(const sharedFactor& factor: *this) {
+  for (const sharedFactor& factor: factors_) {
     if(factor)
       *symbolic += SymbolicFactor(*factor);
     else
@@ -319,17 +321,17 @@ GaussianFactorGraph::shared_ptr NonlinearFactorGraph::linearize(const Values& li
 
 #ifdef GTSAM_USE_TBB
 
-  linearFG->resize(this->size());
+  linearFG->resize(size());
   TbbOpenMPMixedScope threadLimiter; // Limits OpenMP threads since we're mixing TBB and OpenMP
-  tbb::parallel_for(tbb::blocked_range<size_t>(0, this->size()),
+  tbb::parallel_for(tbb::blocked_range<size_t>(0, size()),
     _LinearizeOneFactor(*this, linearizationPoint, *linearFG));
 
 #else
 
-  linearFG->reserve(this->size());
+  linearFG->reserve(size());
 
   // linearize all factors
-  for(const sharedFactor& factor: this->factors_) {
+  for(const sharedFactor& factor: factors_) {
     if(factor) {
       (*linearFG) += factor->linearize(linearizationPoint);
     } else
@@ -341,10 +343,70 @@ GaussianFactorGraph::shared_ptr NonlinearFactorGraph::linearize(const Values& li
   return linearFG;
 }
 
+/* ************************************************************************* */
+static Scatter scatterFromValues(const Values& values, boost::optional<Ordering&> ordering) {
+  gttic(scatterFromValues);
+
+  Scatter scatter;
+  scatter.reserve(values.size());
+
+  if (!ordering) {
+    // use "natural" ordering with keys taken from the initial values
+    for (const auto& key_value : values) {
+      scatter.add(key_value.key, key_value.value.dim());
+    }
+  } else {
+    // copy ordering into keys and lookup dimension in values, is O(n*log n)
+    for (Key key : *ordering) {
+      const Value& value = values.at(key);
+      scatter.add(key, value.dim());
+    }
+  }
+
+  return scatter;
+}
+
+/* ************************************************************************* */
+HessianFactor::shared_ptr NonlinearFactorGraph::linearizeToHessianFactor(
+    const Values& values, boost::optional<Ordering&> ordering, const Dampen& dampen) const {
+  gttic(NonlinearFactorGraph_linearizeToHessianFactor);
+
+  Scatter scatter = scatterFromValues(values, ordering);
+
+  // NOTE(frank): we are heavily leaning on friendship below
+  HessianFactor::shared_ptr hessianFactor(new HessianFactor(scatter));
+
+  // Initialize so we can rank-update below
+  hessianFactor->info_.setZero();
+
+  // linearize all factors straight into the Hessian
+  // TODO(frank): this saves on creating the graph, but still mallocs a gaussianFactor!
+  for (const sharedFactor& nonlinearFactor : factors_) {
+    if (nonlinearFactor) {
+      const auto& gaussianFactor = nonlinearFactor->linearize(values);
+      gaussianFactor->updateHessian(hessianFactor->keys_, &hessianFactor->info_);
+    }
+  }
+
+  if (dampen) dampen(hessianFactor);
+
+  return hessianFactor;
+}
+
+/* ************************************************************************* */
+Values NonlinearFactorGraph::updateCholesky(const Values& values,
+                                            boost::optional<Ordering&> ordering,
+                                            const Dampen& dampen) const {
+  gttic(NonlinearFactorGraph_updateCholesky);
+  auto hessianFactor = linearizeToHessianFactor(values, ordering, dampen);
+  VectorValues delta = hessianFactor->solve();
+  return values.retract(delta);
+}
+
 /* ************************************************************************* */
 NonlinearFactorGraph NonlinearFactorGraph::clone() const {
   NonlinearFactorGraph result;
-  for(const sharedFactor& f: *this) {
+  for (const sharedFactor& f : factors_) {
     if (f)
       result.push_back(f->clone());
     else
@@ -356,7 +418,7 @@ NonlinearFactorGraph NonlinearFactorGraph::clone() const {
 /* ************************************************************************* */
 NonlinearFactorGraph NonlinearFactorGraph::rekey(const std::map<Key,Key>& rekey_mapping) const {
   NonlinearFactorGraph result;
-  for(const sharedFactor& f: *this) {
+  for (const sharedFactor& f : factors_) {
     if (f)
       result.push_back(f->rekey(rekey_mapping));
     else
diff --git a/gtsam/nonlinear/NonlinearFactorGraph.h b/gtsam/nonlinear/NonlinearFactorGraph.h
index 3702d8a8f..323461459 100644
--- a/gtsam/nonlinear/NonlinearFactorGraph.h
+++ b/gtsam/nonlinear/NonlinearFactorGraph.h
@@ -25,6 +25,9 @@
 #include <gtsam/nonlinear/NonlinearFactor.h>
 #include <gtsam/inference/FactorGraph.h>
 
+#include <boost/shared_ptr.hpp>
+#include <functional>
+
 namespace gtsam {
 
   // Forward declarations
@@ -136,14 +139,30 @@ namespace gtsam {
      */
     Ordering orderingCOLAMDConstrained(const FastMap<Key, int>& constraints) const;
 
-    /**
-     * linearize a nonlinear factor graph
-     */
+    /// Linearize a nonlinear factor graph
     boost::shared_ptr<GaussianFactorGraph> linearize(const Values& linearizationPoint) const;
 
+    /// typdef for dampen functions used below
+    typedef std::function<void(const boost::shared_ptr<HessianFactor>& hessianFactor)> Dampen;
+
     /**
-     * Clone() performs a deep-copy of the graph, including all of the factors
+     * Instead of producing a GaussianFactorGraph, pre-allocate and linearize directly
+     * into a HessianFactor. Avoids the many mallocs and pointer indirection in constructing
+     * a new graph, and hence useful in case a dense solve is appropriate for your problem.
+     * An optional ordering can be given that still decides how the Hessian is laid out.
+     * An optional lambda function can be used to apply damping on the filled Hessian.
+     * No parallelism is exploited, because all the factors write in the same memory.
      */
+    boost::shared_ptr<HessianFactor> linearizeToHessianFactor(
+        const Values& values, boost::optional<Ordering&> ordering = boost::none,
+        const Dampen& dampen = nullptr) const;
+
+    /// Linearize and solve in one pass.
+    /// Calls linearizeToHessianFactor, densely solves the normal equations, and updates the values.
+    Values updateCholesky(const Values& values, boost::optional<Ordering&> ordering = boost::none,
+                          const Dampen& dampen = nullptr) const;
+
+    /// Clone() performs a deep-copy of the graph, including all of the factors
     NonlinearFactorGraph clone() const;
 
     /**
diff --git a/gtsam/nonlinear/NonlinearOptimizer.cpp b/gtsam/nonlinear/NonlinearOptimizer.cpp
index bb7b9717b..3c2273870 100644
--- a/gtsam/nonlinear/NonlinearOptimizer.cpp
+++ b/gtsam/nonlinear/NonlinearOptimizer.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -17,7 +17,7 @@
  */
 
 #include <gtsam/nonlinear/NonlinearOptimizer.h>
-
+#include <gtsam/nonlinear/internal/NonlinearOptimizerState.h>
 #include <gtsam/linear/GaussianEliminationTree.h>
 #include <gtsam/linear/VectorValues.h>
 #include <gtsam/linear/SubgraphSolver.h>
@@ -39,48 +39,74 @@ using namespace std;
 namespace gtsam {
 
 /* ************************************************************************* */
-void NonlinearOptimizer::defaultOptimize() {
+// NOTE(frank): unique_ptr by-value takes ownership, as discussed in
+// http://stackoverflow.com/questions/8114276/
+NonlinearOptimizer::NonlinearOptimizer(const NonlinearFactorGraph& graph,
+                                       std::unique_ptr<internal::NonlinearOptimizerState> state)
+    : graph_(graph), state_(std::move(state)) {}
 
-  const NonlinearOptimizerParams& params = this->_params();
-  double currentError = this->error();
+/* ************************************************************************* */
+NonlinearOptimizer::~NonlinearOptimizer() {}
+
+/* ************************************************************************* */
+double NonlinearOptimizer::error() const {
+  return state_->error;
+}
+
+int NonlinearOptimizer::iterations() const {
+  return state_->iterations;
+}
+
+const Values& NonlinearOptimizer::values() const {
+  return state_->values;
+}
+
+/* ************************************************************************* */
+void NonlinearOptimizer::defaultOptimize() {
+  const NonlinearOptimizerParams& params = _params();
+  double currentError = error();
 
   // check if we're already close enough
-  if(currentError <= params.errorTol) {
+  if (currentError <= params.errorTol) {
     if (params.verbosity >= NonlinearOptimizerParams::ERROR)
       cout << "Exiting, as error = " << currentError << " < " << params.errorTol << endl;
     return;
   }
 
   // Maybe show output
-  if (params.verbosity >= NonlinearOptimizerParams::VALUES) this->values().print("Initial values");
-  if (params.verbosity >= NonlinearOptimizerParams::ERROR) cout << "Initial error: " << currentError << endl;
+  if (params.verbosity >= NonlinearOptimizerParams::VALUES)
+    values().print("Initial values");
+  if (params.verbosity >= NonlinearOptimizerParams::ERROR)
+    cout << "Initial error: " << currentError << endl;
 
   // Return if we already have too many iterations
-  if(this->iterations() >= params.maxIterations){
+  if (iterations() >= params.maxIterations) {
     if (params.verbosity >= NonlinearOptimizerParams::TERMINATION) {
-        cout << "iterations: " << this->iterations() << " >? " << params.maxIterations << endl;
-      }
+      cout << "iterations: " << iterations() << " >? " << params.maxIterations << endl;
+    }
     return;
   }
 
   // Iterative loop
   do {
     // Do next iteration
-    currentError = this->error();
-    this->iterate();
+    currentError = error();
+    iterate();
     tictoc_finishedIteration();
 
     // Maybe show output
-    if(params.verbosity >= NonlinearOptimizerParams::VALUES) this->values().print("newValues");
-    if(params.verbosity >= NonlinearOptimizerParams::ERROR) cout << "newError: " << this->error() << endl;
-  } while(this->iterations() < params.maxIterations &&
-      !checkConvergence(params.relativeErrorTol, params.absoluteErrorTol,
-            params.errorTol, currentError, this->error(), params.verbosity));
+    if (params.verbosity >= NonlinearOptimizerParams::VALUES)
+      values().print("newValues");
+    if (params.verbosity >= NonlinearOptimizerParams::ERROR)
+      cout << "newError: " << error() << endl;
+  } while (iterations() < params.maxIterations &&
+           !checkConvergence(params.relativeErrorTol, params.absoluteErrorTol, params.errorTol,
+                             currentError, error(), params.verbosity));
 
   // Printing if verbose
   if (params.verbosity >= NonlinearOptimizerParams::TERMINATION) {
-    cout << "iterations: " << this->iterations() << " >? " << params.maxIterations << endl;
-    if (this->iterations() >= params.maxIterations)
+    cout << "iterations: " << iterations() << " >? " << params.maxIterations << endl;
+    if (iterations() >= params.maxIterations)
       cout << "Terminating because reached maximum iterations" << endl;
   }
 }
@@ -98,38 +124,41 @@ const Values& NonlinearOptimizer::optimizeSafely() {
 }
 
 /* ************************************************************************* */
-VectorValues NonlinearOptimizer::solve(const GaussianFactorGraph &gfg,
-    const Values& initial, const NonlinearOptimizerParams& params) const {
-
+VectorValues NonlinearOptimizer::solve(const GaussianFactorGraph& gfg,
+                                       const NonlinearOptimizerParams& params) const {
   // solution of linear solver is an update to the linearization point
   VectorValues delta;
+  boost::optional<const Ordering&> optionalOrdering;
+  if (params.ordering)
+    optionalOrdering.reset(*params.ordering);
 
   // Check which solver we are using
   if (params.isMultifrontal()) {
     // Multifrontal QR or Cholesky (decided by params.getEliminationFunction())
-    delta = gfg.optimize(*params.ordering, params.getEliminationFunction());
+    delta = gfg.optimize(optionalOrdering, params.getEliminationFunction());
   } else if (params.isSequential()) {
     // Sequential QR or Cholesky (decided by params.getEliminationFunction())
-    delta = gfg.eliminateSequential(*params.ordering,
-            params.getEliminationFunction(), boost::none, params.orderingType)->optimize();
+    delta = gfg.eliminateSequential(optionalOrdering, params.getEliminationFunction(), boost::none,
+                                    params.orderingType)->optimize();
   } else if (params.isIterative()) {
-
     // Conjugate Gradient -> needs params.iterativeParams
     if (!params.iterativeParams)
       throw std::runtime_error("NonlinearOptimizer::solve: cg parameter has to be assigned ...");
 
-    if (boost::shared_ptr<PCGSolverParameters> pcg = boost::dynamic_pointer_cast<PCGSolverParameters>(params.iterativeParams) ) {
+    if (boost::shared_ptr<PCGSolverParameters> pcg =
+            boost::dynamic_pointer_cast<PCGSolverParameters>(params.iterativeParams)) {
       delta = PCGSolver(*pcg).optimize(gfg);
-    }
-    else if (boost::shared_ptr<SubgraphSolverParameters> spcg = boost::dynamic_pointer_cast<SubgraphSolverParameters>(params.iterativeParams) ) {
+    } else if (boost::shared_ptr<SubgraphSolverParameters> spcg =
+                   boost::dynamic_pointer_cast<SubgraphSolverParameters>(params.iterativeParams)) {
+      if (!params.ordering)
+        throw std::runtime_error("SubgraphSolver needs an ordering");
       delta = SubgraphSolver(gfg, *spcg, *params.ordering).optimize();
-    }
-    else {
-      throw std::runtime_error("NonlinearOptimizer::solve: special cg parameter type is not handled in LM solver ...");
+    } else {
+      throw std::runtime_error(
+          "NonlinearOptimizer::solve: special cg parameter type is not handled in LM solver ...");
     }
   } else {
-    throw std::runtime_error(
-        "NonlinearOptimizer::solve: Optimization parameter is invalid");
+    throw std::runtime_error("NonlinearOptimizer::solve: Optimization parameter is invalid");
   }
 
   // return update
@@ -138,47 +167,52 @@ VectorValues NonlinearOptimizer::solve(const GaussianFactorGraph &gfg,
 
 /* ************************************************************************* */
 bool checkConvergence(double relativeErrorTreshold, double absoluteErrorTreshold,
-    double errorThreshold, double currentError, double newError,
-    NonlinearOptimizerParams::Verbosity verbosity) {
-
-  if ( verbosity >= NonlinearOptimizerParams::ERROR ) {
-    if ( newError <= errorThreshold )
+                      double errorThreshold, double currentError, double newError,
+                      NonlinearOptimizerParams::Verbosity verbosity) {
+  if (verbosity >= NonlinearOptimizerParams::ERROR) {
+    if (newError <= errorThreshold)
       cout << "errorThreshold: " << newError << " < " << errorThreshold << endl;
     else
       cout << "errorThreshold: " << newError << " > " << errorThreshold << endl;
   }
 
-  if ( newError <= errorThreshold ) return true ;
+  if (newError <= errorThreshold)
+    return true;
 
   // check if diverges
   double absoluteDecrease = currentError - newError;
   if (verbosity >= NonlinearOptimizerParams::ERROR) {
     if (absoluteDecrease <= absoluteErrorTreshold)
-      cout << "absoluteDecrease: " << setprecision(12) << absoluteDecrease << " < " << absoluteErrorTreshold << endl;
+      cout << "absoluteDecrease: " << setprecision(12) << absoluteDecrease << " < "
+           << absoluteErrorTreshold << endl;
     else
-      cout << "absoluteDecrease: " << setprecision(12) << absoluteDecrease << " >= " << absoluteErrorTreshold << endl;
+      cout << "absoluteDecrease: " << setprecision(12) << absoluteDecrease
+           << " >= " << absoluteErrorTreshold << endl;
   }
 
   // calculate relative error decrease and update currentError
   double relativeDecrease = absoluteDecrease / currentError;
   if (verbosity >= NonlinearOptimizerParams::ERROR) {
     if (relativeDecrease <= relativeErrorTreshold)
-      cout << "relativeDecrease: " << setprecision(12) << relativeDecrease << " < " << relativeErrorTreshold << endl;
+      cout << "relativeDecrease: " << setprecision(12) << relativeDecrease << " < "
+           << relativeErrorTreshold << endl;
     else
-      cout << "relativeDecrease: " << setprecision(12) << relativeDecrease << " >= " << relativeErrorTreshold << endl;
+      cout << "relativeDecrease: " << setprecision(12) << relativeDecrease
+           << " >= " << relativeErrorTreshold << endl;
   }
-  bool converged = (relativeErrorTreshold && (relativeDecrease <= relativeErrorTreshold))
-      || (absoluteDecrease <= absoluteErrorTreshold);
+  bool converged = (relativeErrorTreshold && (relativeDecrease <= relativeErrorTreshold)) ||
+                   (absoluteDecrease <= absoluteErrorTreshold);
   if (verbosity >= NonlinearOptimizerParams::TERMINATION && converged) {
-
-    if(absoluteDecrease >= 0.0)
+    if (absoluteDecrease >= 0.0)
       cout << "converged" << endl;
     else
       cout << "Warning:  stopping nonlinear iterations because error increased" << endl;
 
     cout << "errorThreshold: " << newError << " <? " << errorThreshold << endl;
-    cout << "absoluteDecrease: " << setprecision(12) << absoluteDecrease << " <? " << absoluteErrorTreshold << endl;
-    cout << "relativeDecrease: " << setprecision(12) << relativeDecrease << " <? " << relativeErrorTreshold << endl;
+    cout << "absoluteDecrease: " << setprecision(12) << absoluteDecrease << " <? "
+         << absoluteErrorTreshold << endl;
+    cout << "relativeDecrease: " << setprecision(12) << relativeDecrease << " <? "
+         << relativeErrorTreshold << endl;
   }
   return converged;
 }
@@ -189,5 +223,4 @@ GTSAM_EXPORT bool checkConvergence(const NonlinearOptimizerParams& params, doubl
   return checkConvergence(params.relativeErrorTol, params.absoluteErrorTol, params.errorTol,
                           currentError, newError, params.verbosity);
 }
-
 }
diff --git a/gtsam/nonlinear/NonlinearOptimizer.h b/gtsam/nonlinear/NonlinearOptimizer.h
index cb4cbebc1..c977fcf35 100644
--- a/gtsam/nonlinear/NonlinearOptimizer.h
+++ b/gtsam/nonlinear/NonlinearOptimizer.h
@@ -23,41 +23,7 @@
 
 namespace gtsam {
 
-class NonlinearOptimizer;
-
-/**
- * Base class for a nonlinear optimization state, including the current estimate
- * of the variable values, error, and number of iterations.  Optimizers derived
- * from NonlinearOptimizer usually also define a derived state class containing
- * additional state specific to the algorithm (for example, Dogleg state
- * contains the current trust region radius).
- */
-class GTSAM_EXPORT NonlinearOptimizerState {
-public:
-
-  /** The current estimate of the variable values. */
-  Values values;
-
-  /** The factor graph error on the current values. */
-  double error;
-
-  /** The number of optimization iterations performed. */
-  int iterations;
-
-  NonlinearOptimizerState() {}
-
-  /** Virtual destructor */
-  virtual ~NonlinearOptimizerState() {}
-
-protected:
-  NonlinearOptimizerState(const NonlinearFactorGraph& graph, const Values& values, unsigned int iterations = 0) :
-    values(values), error(graph.error(values)), iterations(iterations) {}
-
-  NonlinearOptimizerState(const Values& values, double error, unsigned int iterations) :
-    values(values), error(error), iterations(iterations) {}
-
-  friend class NonlinearOptimizer;
-};
+namespace internal { struct NonlinearOptimizerState; }
 
 /**
  * This is the abstract interface for classes that can optimize for the
@@ -65,42 +31,25 @@ protected:
  *
  * To use a class derived from this interface, construct the class with a
  * NonlinearFactorGraph and an initial Values variable assignment.  Next, call the
- * optimize() method, which returns a new NonlinearOptimizer object containing
- * the optimized variable assignment.  Call the values() method to retrieve the
- * optimized estimate.  Alternatively, to take a shortcut, instead of calling
- * optimize(), call optimized(), which performs full optimization and returns
- * the resulting Values instead of the new optimizer.
- *
- * Note:  This class is immutable, optimize() and iterate() return new
- * NonlinearOptimizer objects, so be sure to use the returned object and not
- * simply keep the unchanged original.
+ * optimize() method which returns the optimized variable assignment.
  *
  * Simple and compact example:
  * \code
 // One-liner to do full optimization and use the result.
-// Note use of "optimized()" to directly return Values, instead of "optimize()" that returns a new optimizer.
-Values::const_shared_ptr result = DoglegOptimizer(graph, initialValues).optimized();
+Values result = DoglegOptimizer(graph, initialValues).optimize();
 \endcode
  *
  * Example exposing more functionality and details:
  * \code
 // Create initial optimizer
-DoglegOptimizer initial(graph, initialValues);
+DoglegOptimizer optimizer(graph, initialValues);
 
 // Run full optimization until convergence.
-// Note use of "optimize()" to return a new optimizer, instead of "optimized()" that returns only the Values.
-// NonlinearOptimizer pointers are always returned, though they are actually a derived optimizer type.
-NonlinearOptimizer::auto_ptr final = initial->optimize();
+Values result = optimizer->optimize();
 
 // The new optimizer has results and statistics
-cout << "Converged in " << final->iterations() << " iterations "
-        "with final error " << final->error() << endl;
-
-// The values are a const_shared_ptr (boost::shared_ptr<const Values>)
-Values::const_shared_ptr result = final->values();
-
-// Use the results
-useTheResult(result);
+cout << "Converged in " << optimizer.iterations() << " iterations "
+        "with final error " << optimizer.error() << endl;
 \endcode
  *
  * Example of setting parameters before optimization:
@@ -112,29 +61,23 @@ params.relativeErrorTol = 1e-3;
 params.absoluteErrorTol = 1e-3;
 
 // Optimize
-Values::const_shared_ptr result = DoglegOptimizer(graph, initialValues, params).optimized();
+Values result = DoglegOptimizer(graph, initialValues, params).optimize();
 \endcode
  *
  * This interface also exposes an iterate() method, which performs one
- * iteration, returning a NonlinearOptimizer containing the adjusted variable
- * assignment.  The optimize() method simply calls iterate() multiple times,
+ * iteration.  The optimize() method simply calls iterate() multiple times,
  * until the error changes less than a threshold.  We expose iterate() so that
  * you can easily control what happens between iterations, such as drawing or
  * printing, moving points from behind the camera to in front, etc.
  *
- * To modify the graph, values, or parameters between iterations, call the
- * update() functions, which preserve all other state (for example, the trust
- * region size in DoglegOptimizer).  Derived optimizer classes also have
- * additional update methods, not in this abstract interface, for updating
- * algorithm-specific state.
- *
- * For more flexibility, since all functions are virtual, you may override them
- * in your own derived class.
+ * For more flexibility you may override virtual methods in your own derived class.
  */
 class GTSAM_EXPORT NonlinearOptimizer {
 
 protected:
-  NonlinearFactorGraph graph_;
+  NonlinearFactorGraph graph_; ///< The graph with nonlinear factors
+
+  std::unique_ptr<internal::NonlinearOptimizerState> state_; ///< PIMPL'd state
 
 public:
   /** A shared pointer to this class */
@@ -163,13 +106,13 @@ public:
   const Values& optimizeSafely();
 
   /// return error
-  double error() const { return _state().error; }
+  double error() const;
 
   /// return number of iterations
-  int iterations() const { return _state().iterations; }
+  int iterations() const;
 
   /// return values
-  const Values& values() const { return _state().values; }
+  const Values& values() const;
 
   /// @}
 
@@ -177,17 +120,17 @@ public:
   /// @{
 
   /** Virtual destructor */
-  virtual ~NonlinearOptimizer() {}
+  virtual ~NonlinearOptimizer();
 
   /** Default function to do linear solve, i.e. optimize a GaussianFactorGraph */
   virtual VectorValues solve(const GaussianFactorGraph &gfg,
-      const Values& initial, const NonlinearOptimizerParams& params) const;
+      const NonlinearOptimizerParams& params) const;
 
   /** Perform a single iteration, returning a new NonlinearOptimizer class
    * containing the updated variable assignments, which may be retrieved with
    * values().
    */
-  virtual void iterate() = 0;
+  virtual GaussianFactorGraph::shared_ptr iterate() = 0;
 
   /// @}
 
@@ -197,13 +140,11 @@ protected:
    */
   void defaultOptimize();
 
-  virtual const NonlinearOptimizerState& _state() const = 0;
-
   virtual const NonlinearOptimizerParams& _params() const = 0;
 
-  /** Constructor for initial construction of base classes. */
-  NonlinearOptimizer(const NonlinearFactorGraph& graph) : graph_(graph) {}
-
+  /** Constructor for initial construction of base classes. Takes ownership of state. */
+  NonlinearOptimizer(const NonlinearFactorGraph& graph,
+                     std::unique_ptr<internal::NonlinearOptimizerState> state);
 };
 
 /** Check whether the relative error decrease is less than relativeErrorTreshold,
diff --git a/gtsam/nonlinear/Values.cpp b/gtsam/nonlinear/Values.cpp
index e21661fee..1bd8b3a73 100644
--- a/gtsam/nonlinear/Values.cpp
+++ b/gtsam/nonlinear/Values.cpp
@@ -36,6 +36,7 @@
 #include <boost/iterator/transform_iterator.hpp>
 
 #include <list>
+#include <memory>
 #include <sstream>
 
 using namespace std;
@@ -47,12 +48,32 @@ namespace gtsam {
     this->insert(other);
   }
 
+  /* ************************************************************************* */
+  Values::Values(Values&& other) : values_(std::move(other.values_)) {
+  }
+
+  /* ************************************************************************* */
+  Values::Values(const Values& other, const VectorValues& delta) {
+    for (const_iterator key_value = other.begin(); key_value != other.end(); ++key_value) {
+      VectorValues::const_iterator it = delta.find(key_value->key);
+      Key key = key_value->key;  // Non-const duplicate to deal with non-const insert argument
+      if (it != delta.end()) {
+        const Vector& v = it->second;
+        Value* retractedValue(key_value->value.retract_(v));  // Retract
+        values_.insert(key, retractedValue);  // Add retracted result directly to result values
+      } else {
+        values_.insert(key, key_value->value.clone_());  // Add original version to result values
+      }
+    }
+  }
+
   /* ************************************************************************* */
   void Values::print(const string& str, const KeyFormatter& keyFormatter) const {
     cout << str << "Values with " << size() << " values:" << endl;
     for(const_iterator key_value = begin(); key_value != end(); ++key_value) {
       cout << "Value " << keyFormatter(key_value->key) << ": ";
       key_value->value.print("");
+      cout << "\n";
     }
   }
 
@@ -78,23 +99,8 @@ namespace gtsam {
   }
 
   /* ************************************************************************* */
-  Values Values::retract(const VectorValues& delta) const
-  {
-    Values result;
-
-    for(const_iterator key_value = begin(); key_value != end(); ++key_value) {
-      VectorValues::const_iterator vector_item = delta.find(key_value->key);
-      Key key = key_value->key;  // Non-const duplicate to deal with non-const insert argument
-      if(vector_item != delta.end()) {
-        const Vector& singleDelta = vector_item->second;
-        Value* retractedValue(key_value->value.retract_(singleDelta)); // Retract
-        result.values_.insert(key, retractedValue); // Add retracted result directly to result values
-      } else {
-        result.values_.insert(key, key_value->value.clone_()); // Add original version to result values
-      }
-    }
-
-    return result;
+  Values Values::retract(const VectorValues& delta) const {
+    return Values(*this, delta);
   }
 
   /* ************************************************************************* */
diff --git a/gtsam/nonlinear/Values.h b/gtsam/nonlinear/Values.h
index a61d01f23..bf17f1f0d 100644
--- a/gtsam/nonlinear/Values.h
+++ b/gtsam/nonlinear/Values.h
@@ -146,6 +146,12 @@ namespace gtsam {
     /** Copy constructor duplicates all keys and values */
     Values(const Values& other);
 
+    /** Move constructor */
+    Values(Values&& other);
+
+    /** Construct from a Values and an update vector: identical to other.retract(delta) */
+    Values(const Values& other, const VectorValues& delta);
+
     /** Constructor from a Filtered view copies out all values */
     template<class ValueType>
     Values(const Filtered<ValueType>& view);
diff --git a/gtsam/nonlinear/internal/CallRecord.h b/gtsam/nonlinear/internal/CallRecord.h
index 6616f0a83..e58c40203 100644
--- a/gtsam/nonlinear/internal/CallRecord.h
+++ b/gtsam/nonlinear/internal/CallRecord.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
diff --git a/gtsam/nonlinear/internal/ExpressionNode.h b/gtsam/nonlinear/internal/ExpressionNode.h
index 966f43da8..d11908b54 100644
--- a/gtsam/nonlinear/internal/ExpressionNode.h
+++ b/gtsam/nonlinear/internal/ExpressionNode.h
@@ -175,7 +175,7 @@ public:
 
   /// Print
   virtual void print(const std::string& indent = "") const {
-    std::cout << indent << "Leaf, key = " << key_ << std::endl;
+    std::cout << indent << "Leaf, key = " << DefaultKeyFormatter(key_) << std::endl;
   }
 
   /// Return keys that play in this expression
@@ -310,11 +310,13 @@ public:
     assert(reinterpret_cast<size_t>(ptr) % TraceAlignment == 0);
 
     // Create a Record in the memory pointed to by ptr
-    // Calling the construct will record the traces for all arguments
+    // Calling the constructor will record the traces for all arguments
     // Write an Expression<A> execution trace in record->trace
     // Iff Constant or Leaf, this will not write to traceStorage, only to trace.
     // Iff the expression is functional, write all Records in traceStorage buffer
     // Return value of type T is recorded in record->value
+    // NOTE(frank, abe): The destructor on this record is never called due to this placement new
+    // Records must only contain statically sized objects!
     Record* record = new (ptr) Record(values, *expression1_, ptr);
 
     // Our trace parameter is set to point to the Record
@@ -389,6 +391,7 @@ public:
     ExecutionTrace<A1> trace1;
     ExecutionTrace<A2> trace2;
 
+    // TODO(frank): These aren't needed kill them!
     A1 value1;
     A2 value2;
 
@@ -632,91 +635,82 @@ class ScalarMultiplyNode : public ExpressionNode<T> {
   }
 };
 
+
 //-----------------------------------------------------------------------------
-/// Sum Expression
+/// Binary Sum Expression
 template <class T>
-class SumExpressionNode : public ExpressionNode<T> {
+class BinarySumNode : public ExpressionNode<T> {
   typedef ExpressionNode<T> NodeT;
-  std::vector<boost::shared_ptr<NodeT>> expressions_;
+  boost::shared_ptr<ExpressionNode<T> > expression1_;
+  boost::shared_ptr<ExpressionNode<T> > expression2_;
 
  public:
-  explicit SumExpressionNode(const std::vector<Expression<T>>& expressions) {
+  explicit BinarySumNode() {
     this->traceSize_ = upAligned(sizeof(Record));
-    for (const Expression<T>& e : expressions)
-      add(e);
   }
 
-  void add(const Expression<T>& e) {
-    expressions_.push_back(e.root());
-    this->traceSize_ += e.traceSize();
+  /// Constructor with a binary function f, and two input arguments
+  BinarySumNode(const Expression<T>& e1, const Expression<T>& e2)
+      : expression1_(e1.root()), expression2_(e2.root()) {
+    this->traceSize_ =  //
+        upAligned(sizeof(Record)) + e1.traceSize() + e2.traceSize();
   }
 
   /// Destructor
-  virtual ~SumExpressionNode() {}
-
-  size_t nrTerms() const {
-    return expressions_.size();
-  }
+  virtual ~BinarySumNode() {}
 
   /// Print
   virtual void print(const std::string& indent = "") const {
-    std::cout << indent << "SumExpressionNode" << std::endl;
-    for (const auto& node : expressions_)
-      node->print(indent + "  ");
+    std::cout << indent << "BinarySumNode" << std::endl;
+    expression1_->print(indent + "  ");
+    expression2_->print(indent + "  ");
   }
 
   /// Return value
   virtual T value(const Values& values) const {
-    auto it = expressions_.begin();
-    T sum = (*it)->value(values);
-    for (++it; it != expressions_.end(); ++it)
-      sum = sum + (*it)->value(values);
-    return sum;
+    return expression1_->value(values) + expression2_->value(values);
   }
 
   /// Return keys that play in this expression
   virtual std::set<Key> keys() const {
-    std::set<Key> keys;
-    for (const auto& node : expressions_) {
-      std::set<Key> myKeys = node->keys();
-      keys.insert(myKeys.begin(), myKeys.end());
-    }
+    std::set<Key> keys = expression1_->keys();
+    std::set<Key> myKeys = expression2_->keys();
+    keys.insert(myKeys.begin(), myKeys.end());
     return keys;
   }
 
   /// Return dimensions for each argument
   virtual void dims(std::map<Key, int>& map) const {
-    for (const auto& node : expressions_)
-      node->dims(map);
+    expression1_->dims(map);
+    expression2_->dims(map);
   }
 
   // Inner Record Class
   struct Record : public CallRecordImplementor<Record, traits<T>::dimension> {
-    std::vector<ExecutionTrace<T>> traces_;
-
-    explicit Record(size_t nrArguments) : traces_(nrArguments) {}
+    ExecutionTrace<T> trace1;
+    ExecutionTrace<T> trace2;
 
     /// Print to std::cout
     void print(const std::string& indent) const {
-      std::cout << indent << "SumExpressionNode::Record {" << std::endl;
-      for (const auto& trace : traces_)
-        trace.print(indent);
+      std::cout << indent << "BinarySumNode::Record {" << std::endl;
+      trace1.print(indent);
+      trace2.print(indent);
       std::cout << indent << "}" << std::endl;
     }
 
-    /// If the SumExpression is the root, we just start as many pipelines as there are terms.
+    /// If the BinarySumExpression is the root, we just start as many pipelines as there are terms.
     void startReverseAD4(JacobianMap& jacobians) const {
-      for (const auto& trace : traces_)
-        // NOTE(frank): equivalent to trace.reverseAD1(dTdA, jacobians) with dTdA=Identity
-        trace.startReverseAD1(jacobians);
+      // NOTE(frank): equivalent to trace.reverseAD1(dTdA, jacobians) with dTdA=Identity
+      trace1.startReverseAD1(jacobians);
+      trace2.startReverseAD1(jacobians);
     }
 
     /// If we are not the root, we simply pass on the adjoint matrix dFdT to all terms
     template <typename MatrixType>
     void reverseAD4(const MatrixType& dFdT, JacobianMap& jacobians) const {
-      for (const auto& trace : traces_)
-        // NOTE(frank): equivalent to trace.reverseAD1(dFdT * dTdA, jacobians) with dTdA=Identity
-        trace.reverseAD1(dFdT, jacobians);
+      // NOTE(frank): equivalent to trace.reverseAD1(dFdT * dTdA, jacobians) with dTdA=Identity
+      trace1.reverseAD1(dFdT, jacobians);
+      trace2.reverseAD1(dFdT, jacobians);
     }
   };
 
@@ -724,17 +718,13 @@ class SumExpressionNode : public ExpressionNode<T> {
   virtual T traceExecution(const Values& values, ExecutionTrace<T>& trace,
                            ExecutionTraceStorage* ptr) const {
     assert(reinterpret_cast<size_t>(ptr) % TraceAlignment == 0);
-    size_t nrArguments = expressions_.size();
-    Record* record = new (ptr) Record(nrArguments);
-    ptr += upAligned(sizeof(Record));
-    size_t i = 0;
-    T sum = traits<T>::Identity();
-    for (const auto& node : expressions_) {
-      sum = sum + node->traceExecution(values, record->traces_[i++], ptr);
-      ptr += node->traceSize();
-    }
+    Record* record = new (ptr) Record();
     trace.setFunction(record);
-    return sum;
+
+    ExecutionTraceStorage* ptr1 = ptr + upAligned(sizeof(Record));
+    ExecutionTraceStorage* ptr2 = ptr1 + expression1_->traceSize();
+    return expression1_->traceExecution(values, record->trace1, ptr1) +
+           expression2_->traceExecution(values, record->trace2, ptr2);
   }
 };
 
diff --git a/gtsam/nonlinear/internal/LevenbergMarquardtState.h b/gtsam/nonlinear/internal/LevenbergMarquardtState.h
new file mode 100644
index 000000000..19a390c45
--- /dev/null
+++ b/gtsam/nonlinear/internal/LevenbergMarquardtState.h
@@ -0,0 +1,160 @@
+/* ----------------------------------------------------------------------------
+
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
+ * Atlanta, Georgia 30332-0415
+ * All Rights Reserved
+ * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
+
+ * See LICENSE for the license information
+
+ * -------------------------------------------------------------------------- */
+
+/**
+ * @file    LevenbergMarquardtState.h
+ * @brief   A LevenbergMarquardtState class containing most of the logic for Levenberg-Marquardt
+ * @author  Frank Dellaert
+ * @date    April 2016
+ */
+
+#include "NonlinearOptimizerState.h"
+
+#include <gtsam/nonlinear/Values.h>
+#include <gtsam/linear/GaussianFactorGraph.h>
+#include <gtsam/linear/JacobianFactor.h>
+#include <gtsam/linear/NoiseModel.h>
+#include <gtsam/base/Matrix.h>
+#include <gtsam/base/Vector.h>
+
+#include <boost/shared_ptr.hpp>
+
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+#include <vector>
+
+namespace gtsam {
+
+namespace internal {
+
+// TODO(frank): once Values supports move, we can make State completely functional.
+// As it is now, increaseLambda is non-const or otherwise we make a Values copy every time
+// decreaseLambda would also benefit from a working Values move constructor
+struct LevenbergMarquardtState : public NonlinearOptimizerState {
+  typedef LevenbergMarquardtState This;
+
+  // TODO(frank): make these const once increaseLambda can be made const
+  double lambda;
+  double currentFactor;
+  int totalNumberInnerIterations;  ///< The total number of inner iterations in the
+                                   // optimization (for each iteration, LM tries multiple
+                                   // inner iterations with different lambdas)
+
+  LevenbergMarquardtState(const Values& initialValues, double error, double lambda,
+                          double currentFactor, unsigned int iterations = 0,
+                          unsigned int totalNumberInnerIterations = 0)
+      : NonlinearOptimizerState(initialValues, error, iterations),
+        lambda(lambda),
+        currentFactor(currentFactor),
+        totalNumberInnerIterations(totalNumberInnerIterations) {}
+
+  // Constructor version that takes ownership of values
+  LevenbergMarquardtState(Values&& initialValues, double error, double lambda, double currentFactor,
+                          unsigned int iterations = 0, unsigned int totalNumberInnerIterations = 0)
+      : NonlinearOptimizerState(initialValues, error, iterations),
+        lambda(lambda),
+        currentFactor(currentFactor),
+        totalNumberInnerIterations(totalNumberInnerIterations) {}
+
+  // Applies policy to *increase* lambda: should be used if the current update was NOT successful
+  // TODO(frank): non-const method until Values properly support std::move
+  void increaseLambda(const LevenbergMarquardtParams& params) {
+    lambda *= currentFactor;
+    totalNumberInnerIterations += 1;
+    if (!params.useFixedLambdaFactor) {
+      currentFactor *= 2.0;
+    }
+  }
+
+  // Apply policy to decrease lambda if the current update was successful
+  // stepQuality not used in the naive policy)
+  // Take ownsership of newValues, must be passed an rvalue
+  std::unique_ptr<This> decreaseLambda(const LevenbergMarquardtParams& params, double stepQuality,
+                                       Values&& newValues, double newError) const {
+    double newLambda = lambda, newFactor = currentFactor;
+    if (params.useFixedLambdaFactor) {
+      newLambda /= currentFactor;
+    } else {
+      // TODO(frank): odd that currentFactor is not used to change lambda here...
+      newLambda *= std::max(1.0 / 3.0, 1.0 - pow(2.0 * stepQuality - 1.0, 3));
+      newFactor = 2.0 * currentFactor;
+    }
+    newLambda = std::max(params.lambdaLowerBound, newLambda);
+    return std::unique_ptr<This>(new This(std::move(newValues), newError, newLambda, newFactor,
+                                          iterations + 1, totalNumberInnerIterations + 1));
+  }
+
+  /** Small struct to cache objects needed for damping.
+   * This is used in buildDampedSystem  */
+  struct CachedModel {
+    CachedModel() {}  // default int makes zero-size matrices
+    CachedModel(int dim, double sigma)
+        : A(Matrix::Identity(dim, dim)),
+          b(Vector::Zero(dim)),
+          model(noiseModel::Isotropic::Sigma(dim, sigma)) {}
+    CachedModel(int dim, double sigma, const Vector& diagonal)
+        : A(Eigen::DiagonalMatrix<double, Eigen::Dynamic>(diagonal)),
+          b(Vector::Zero(dim)),
+          model(noiseModel::Isotropic::Sigma(dim, sigma)) {}
+    Matrix A;
+    Vector b;
+    SharedDiagonal model;
+  };
+
+  // Small cache of A|b|model indexed by dimension. Can save many mallocs.
+  mutable std::vector<CachedModel> noiseModelCache;
+  CachedModel* getCachedModel(size_t dim) const {
+    if (dim > noiseModelCache.size())
+      noiseModelCache.resize(dim);
+    CachedModel* item = &noiseModelCache[dim - 1];
+    if (!item->model)
+      *item = CachedModel(dim, 1.0 / std::sqrt(lambda));
+    return item;
+  };
+
+  /// Build a damped system for a specific lambda, vanilla version
+  GaussianFactorGraph buildDampedSystem(GaussianFactorGraph damped /* gets copied */) const {
+    noiseModelCache.resize(0);
+    // for each of the variables, add a prior
+    damped.reserve(damped.size() + values.size());
+    for (const auto& key_value : values) {
+      const Key key = key_value.key;
+      const size_t dim = key_value.value.dim();
+      const CachedModel* item = getCachedModel(dim);
+      damped += boost::make_shared<JacobianFactor>(key, item->A, item->b, item->model);
+    }
+    return damped;
+  }
+
+  /// Build a damped system, use hessianDiagonal per variable (more expensive)
+  GaussianFactorGraph buildDampedSystem(GaussianFactorGraph damped,  // gets copied
+                                        const VectorValues& sqrtHessianDiagonal) const {
+    noiseModelCache.resize(0);
+    damped.reserve(damped.size() + values.size());
+    for (const auto& key_vector : sqrtHessianDiagonal) {
+      try {
+        const Key key = key_vector.first;
+        const size_t dim = key_vector.second.size();
+        CachedModel* item = getCachedModel(dim);
+        item->A.diagonal() = sqrtHessianDiagonal.at(key);  // use diag(hessian)
+        damped += boost::make_shared<JacobianFactor>(key, item->A, item->b, item->model);
+      } catch (const std::out_of_range& e) {
+        continue;  // Don't attempt any damping if no key found in diagonal
+      }
+    }
+    return damped;
+  }
+};
+
+}  // namespace internal
+
+} /* namespace gtsam */
diff --git a/gtsam/nonlinear/internal/NonlinearOptimizerState.h b/gtsam/nonlinear/internal/NonlinearOptimizerState.h
new file mode 100644
index 000000000..d356d20d5
--- /dev/null
+++ b/gtsam/nonlinear/internal/NonlinearOptimizerState.h
@@ -0,0 +1,56 @@
+/* ----------------------------------------------------------------------------
+
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
+ * Atlanta, Georgia 30332-0415
+ * All Rights Reserved
+ * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
+
+ * See LICENSE for the license information
+
+ * -------------------------------------------------------------------------- */
+
+/**
+ * @file NonlinearOptimizerState.h
+ * @brief Private class for NonlinearOptimizer state
+ * @author Richard Roberts
+ * @author Frank Dellaert
+ * @date Sep 7, 2009
+ */
+
+#pragma once
+
+#include <gtsam/nonlinear/Values.h>
+
+namespace gtsam {
+namespace internal {
+
+/**
+ * Base class for a nonlinear optimization state, including the current estimate
+ * of the variable values, error, and number of iterations.  Optimizers derived
+ * from NonlinearOptimizer usually also define a derived state class containing
+ * additional state specific to the algorithm (for example, Dogleg state
+ * contains the current trust region radius).
+ */
+struct NonlinearOptimizerState {
+ public:
+  /** The current estimate of the variable values. */
+  const Values values;
+
+  /** The factor graph error on the current values. */
+  const double error;
+
+  /** The number of optimization iterations performed. */
+  const unsigned int iterations;
+
+  virtual ~NonlinearOptimizerState() {}
+
+  NonlinearOptimizerState(const Values& values, double error, unsigned int iterations = 0)
+      : values(values), error(error), iterations(iterations) {}
+
+  // Constructor version that takes ownership of values
+  NonlinearOptimizerState(Values&& values, double error, unsigned int iterations = 0)
+      : values(std::move(values)), error(error), iterations(iterations) {}
+};
+
+}  // namespace internal
+}  // namespace gtsam
diff --git a/gtsam/nonlinear/tests/testExpression.cpp b/gtsam/nonlinear/tests/testExpression.cpp
index db98971ff..bde79e780 100644
--- a/gtsam/nonlinear/tests/testExpression.cpp
+++ b/gtsam/nonlinear/tests/testExpression.cpp
@@ -307,18 +307,18 @@ TEST(Expression, ternary) {
 /* ************************************************************************* */
 TEST(Expression, ScalarMultiply) {
   const Key key(67);
-  const Point3_ sum_ = 23 * Point3_(key);
+  const Point3_ expr = 23 * Point3_(key);
 
   set<Key> expected_keys = list_of(key);
-  EXPECT(expected_keys == sum_.keys());
+  EXPECT(expected_keys == expr.keys());
 
   map<Key, int> actual_dims, expected_dims = map_list_of<Key, int>(key, 3);
-  sum_.dims(actual_dims);
+  expr.dims(actual_dims);
   EXPECT(actual_dims == expected_dims);
 
   // Check dims as map
   std::map<Key, int> map;
-  sum_.dims(map);
+  expr.dims(map);
   LONGS_EQUAL(1, map.size());
 
   Values values;
@@ -326,16 +326,16 @@ TEST(Expression, ScalarMultiply) {
 
   // Check value
   const Point3 expected(23, 0, 46);
-  EXPECT(assert_equal(expected, sum_.value(values)));
+  EXPECT(assert_equal(expected, expr.value(values)));
 
   // Check value + Jacobians
   std::vector<Matrix> H(1);
-  EXPECT(assert_equal(expected, sum_.value(values, H)));
+  EXPECT(assert_equal(expected, expr.value(values, H)));
   EXPECT(assert_equal(23 * I_3x3, H[0]));
 }
 
 /* ************************************************************************* */
-TEST(Expression, Sum) {
+TEST(Expression, BinarySum) {
   const Key key(67);
   const Point3_ sum_ = Point3_(key) + Point3_(Point3(1, 1, 1));
 
@@ -368,9 +368,31 @@ TEST(Expression, Sum) {
 TEST(Expression, TripleSum) {
   const Key key(67);
   const Point3_ p1_(Point3(1, 1, 1)), p2_(key);
-  const SumExpression<Point3> sum_ = p1_ + p2_ + p1_;
+  const Expression<Point3> sum_ = p1_ + p2_ + p1_;
+
+  LONGS_EQUAL(1, sum_.keys().size());
+
+  Values values;
+  values.insert<Point3>(key, Point3(2, 2, 2));
+
+  // Check value
+  const Point3 expected(4, 4, 4);
+  EXPECT(assert_equal(expected, sum_.value(values)));
+
+  // Check value + Jacobians
+  std::vector<Matrix> H(1);
+  EXPECT(assert_equal(expected, sum_.value(values, H)));
+  EXPECT(assert_equal(I_3x3, H[0]));
+}
+
+/* ************************************************************************* */
+TEST(Expression, PlusEqual) {
+  const Key key(67);
+  const Point3_ p1_(Point3(1, 1, 1)), p2_(key);
+  Expression<Point3> sum_ = p1_;
+  sum_ += p2_;
+  sum_ += p1_;
 
-  LONGS_EQUAL(3, sum_.nrTerms());
   LONGS_EQUAL(1, sum_.keys().size());
 
   Values values;
diff --git a/gtsam/nonlinear/tests/testValues.cpp b/gtsam/nonlinear/tests/testValues.cpp
index b7c4efeca..c82ba3391 100644
--- a/gtsam/nonlinear/tests/testValues.cpp
+++ b/gtsam/nonlinear/tests/testValues.cpp
@@ -31,6 +31,7 @@
 using namespace boost::assign;
 #include <stdexcept>
 #include <limits>
+#include <type_traits>
 
 using namespace gtsam;
 using namespace std;
@@ -47,9 +48,9 @@ class TestValueData {
 public:
   static int ConstructorCount;
   static int DestructorCount;
-  TestValueData(const TestValueData& other) { /*cout << "Copy constructor" << endl;*/ ++ ConstructorCount; }
-  TestValueData() { /*cout << "Default constructor" << endl;*/ ++ ConstructorCount; }
-  ~TestValueData() { /*cout << "Destructor" << endl;*/ ++ DestructorCount; }
+  TestValueData(const TestValueData& other) { ++ ConstructorCount; }
+  TestValueData() { ++ ConstructorCount; }
+  ~TestValueData() { ++ DestructorCount; }
 };
 int TestValueData::ConstructorCount = 0;
 int TestValueData::DestructorCount = 0;
@@ -76,7 +77,6 @@ namespace gtsam {
 template <> struct traits<TestValue> : public internal::Manifold<TestValue> {};
 }
 
-
 /* ************************************************************************* */
 TEST( Values, equals1 )
 {
@@ -198,28 +198,14 @@ TEST(Values, basic_functions)
 
 }
 
-///* ************************************************************************* */
-//TEST(Values, dim_zero)
-//{
-//  Values config0;
-//  config0.insert(key1, Vector2(Vector2(2.0, 3.0));
-//  config0.insert(key2, Vector3(5.0, 6.0, 7.0));
-//  LONGS_EQUAL(5, config0.dim());
-//
-//  VectorValues expected;
-//  expected.insert(key1, zero(2));
-//  expected.insert(key2, zero(3));
-//  CHECK(assert_equal(expected, config0.zero()));
-//}
-
 /* ************************************************************************* */
-TEST(Values, expmap_a)
+TEST(Values, retract_full)
 {
   Values config0;
   config0.insert(key1, Vector3(1.0, 2.0, 3.0));
   config0.insert(key2, Vector3(5.0, 6.0, 7.0));
 
-  VectorValues increment = pair_list_of<Key, Vector>
+  VectorValues delta = pair_list_of<Key, Vector>
     (key1, Vector3(1.0, 1.1, 1.2))
     (key2, Vector3(1.3, 1.4, 1.5));
 
@@ -227,51 +213,35 @@ TEST(Values, expmap_a)
   expected.insert(key1, Vector3(2.0, 3.1, 4.2));
   expected.insert(key2, Vector3(6.3, 7.4, 8.5));
 
-  CHECK(assert_equal(expected, config0.retract(increment)));
+  CHECK(assert_equal(expected, config0.retract(delta)));
+  CHECK(assert_equal(expected, Values(config0, delta)));
 }
 
 /* ************************************************************************* */
-TEST(Values, expmap_b)
+TEST(Values, retract_partial)
 {
   Values config0;
   config0.insert(key1, Vector3(1.0, 2.0, 3.0));
   config0.insert(key2, Vector3(5.0, 6.0, 7.0));
 
-  VectorValues increment = pair_list_of<Key, Vector>
+  VectorValues delta = pair_list_of<Key, Vector>
     (key2, Vector3(1.3, 1.4, 1.5));
 
   Values expected;
   expected.insert(key1, Vector3(1.0, 2.0, 3.0));
   expected.insert(key2, Vector3(6.3, 7.4, 8.5));
 
-  CHECK(assert_equal(expected, config0.retract(increment)));
+  CHECK(assert_equal(expected, config0.retract(delta)));
+  CHECK(assert_equal(expected, Values(config0, delta)));
 }
 
 /* ************************************************************************* */
-//TEST(Values, expmap_c)
-//{
-//  Values config0;
-//  config0.insert(key1, Vector3(1.0, 2.0, 3.0));
-//  config0.insert(key2, Vector3(5.0, 6.0, 7.0));
-//
-//  Vector increment = Vector6(
-//      1.0, 1.1, 1.2,
-//      1.3, 1.4, 1.5);
-//
-//  Values expected;
-//  expected.insert(key1, Vector3(2.0, 3.1, 4.2));
-//  expected.insert(key2, Vector3(6.3, 7.4, 8.5));
-//
-//  CHECK(assert_equal(expected, config0.retract(increment)));
-//}
-
-/* ************************************************************************* */
-TEST(Values, expmap_d)
+TEST(Values, equals)
 {
   Values config0;
   config0.insert(key1, Vector3(1.0, 2.0, 3.0));
   config0.insert(key2, Vector3(5.0, 6.0, 7.0));
-  //config0.print("config0");
+
   CHECK(equal(config0, config0));
   CHECK(config0.equals(config0));
 
@@ -279,8 +249,8 @@ TEST(Values, expmap_d)
   poseconfig.insert(key1, Pose2(1, 2, 3));
   poseconfig.insert(key2, Pose2(0.3, 0.4, 0.5));
 
-  CHECK(equal(config0, config0));
-  CHECK(config0.equals(config0));
+  CHECK(equal(poseconfig, poseconfig));
+  CHECK(poseconfig.equals(poseconfig));
 }
 
 /* ************************************************************************* */
@@ -453,8 +423,8 @@ TEST(Values, Symbol_filter) {
 }
 
 /* ************************************************************************* */
+// Check that Value destructors are called when Values container is deleted
 TEST(Values, Destructors) {
-  // Check that Value destructors are called when Values container is deleted
   {
     Values values;
     {
@@ -469,11 +439,95 @@ TEST(Values, Destructors) {
     // GenericValue<TestValue> in insert()
     // but I'm sure some advanced programmer can figure out
     // a way to avoid the temporary, or optimize it out
-    LONGS_EQUAL(4+2, (long)TestValueData::ConstructorCount);
-    LONGS_EQUAL(2+2, (long)TestValueData::DestructorCount);
+    LONGS_EQUAL(4 + 2, (long)TestValueData::ConstructorCount);
+    LONGS_EQUAL(2 + 2, (long)TestValueData::DestructorCount);
+  }
+  LONGS_EQUAL(4 + 2, (long)TestValueData::ConstructorCount);
+  LONGS_EQUAL(4 + 2, (long)TestValueData::DestructorCount);
+}
+
+/* ************************************************************************* */
+TEST(Values, copy_constructor) {
+  {
+    Values values;
+    TestValueData::ConstructorCount = 0;
+    TestValueData::DestructorCount = 0;
+    {
+      TestValue value1;
+      TestValue value2;
+      EXPECT_LONGS_EQUAL(2, (long)TestValueData::ConstructorCount);
+      EXPECT_LONGS_EQUAL(0, (long)TestValueData::DestructorCount);
+      values.insert(0, value1);
+      values.insert(1, value2);
+    }
+    EXPECT_LONGS_EQUAL(6, (long)TestValueData::ConstructorCount);
+    EXPECT_LONGS_EQUAL(4, (long)TestValueData::DestructorCount);
+
+    // Copy constructor
+    {
+      Values copied(values); // makes 2 extra copies
+      EXPECT_LONGS_EQUAL(8, (long)TestValueData::ConstructorCount);
+      EXPECT_LONGS_EQUAL(4, (long)TestValueData::DestructorCount);
+    }
+    EXPECT_LONGS_EQUAL(8, (long)TestValueData::ConstructorCount);
+    EXPECT_LONGS_EQUAL(6, (long)TestValueData::DestructorCount); // copied destructed !
+  }
+  EXPECT_LONGS_EQUAL(8, (long)TestValueData::ConstructorCount);
+  EXPECT_LONGS_EQUAL(8, (long)TestValueData::DestructorCount); // values destructed !
+}
+
+/* ************************************************************************* */
+// small class with a constructor to create an rvalue
+struct TestValues : Values {
+  using Values::Values; // inherits move constructor
+
+  TestValues(const TestValue& value1, const TestValue& value2) {
+    insert(0, value1);
+    insert(1, value2);
+  }
+};
+static_assert(std::is_move_constructible<Values>::value, "");
+static_assert(std::is_move_constructible<TestValues>::value, "");
+
+// test move semantics
+TEST(Values, move_constructor) {
+  {
+    TestValueData::ConstructorCount = 0;
+    TestValueData::DestructorCount = 0;
+    TestValue value1;
+    TestValue value2;
+    EXPECT_LONGS_EQUAL(2, (long)TestValueData::ConstructorCount);
+    EXPECT_LONGS_EQUAL(0, (long)TestValueData::DestructorCount);
+    TestValues values(TestValues(value1, value2)); // Move happens here ! (could be optimization?)
+    EXPECT_LONGS_EQUAL(2, values.size());
+    EXPECT_LONGS_EQUAL(6, (long)TestValueData::ConstructorCount); // yay ! We don't copy
+    EXPECT_LONGS_EQUAL(2, (long)TestValueData::DestructorCount); // extra insert copies
+  }
+  EXPECT_LONGS_EQUAL(6, (long)TestValueData::ConstructorCount);
+  EXPECT_LONGS_EQUAL(6, (long)TestValueData::DestructorCount);
+}
+
+// test use of std::move
+TEST(Values, std_move) {
+  {
+    TestValueData::ConstructorCount = 0;
+    TestValueData::DestructorCount = 0;
+    {
+      TestValue value1;
+      TestValue value2;
+      TestValues values(value1, value2);
+      EXPECT_LONGS_EQUAL(6, (long)TestValueData::ConstructorCount);
+      EXPECT_LONGS_EQUAL(2, (long)TestValueData::DestructorCount);
+      EXPECT_LONGS_EQUAL(2, values.size());
+      TestValues moved(std::move(values));   // Move happens here !
+      EXPECT_LONGS_EQUAL(2, values.size());  // Uh oh! Should be 0 !
+      EXPECT_LONGS_EQUAL(2, moved.size());
+      EXPECT_LONGS_EQUAL(8, (long)TestValueData::ConstructorCount);  // Uh oh! Should be 6 :-(
+      EXPECT_LONGS_EQUAL(2, (long)TestValueData::DestructorCount);   // extra insert copies
+    }
+    EXPECT_LONGS_EQUAL(8, (long)TestValueData::ConstructorCount);
+    EXPECT_LONGS_EQUAL(8, (long)TestValueData::DestructorCount);
   }
-  LONGS_EQUAL(4+2, (long)TestValueData::ConstructorCount);
-  LONGS_EQUAL(4+2, (long)TestValueData::DestructorCount);
 }
 
 /* ************************************************************************* */
@@ -511,6 +565,8 @@ TEST(Values, VectorFixedInsertFixedRead) {
 }
 
 /* ************************************************************************* */
+// NOTE(frank): test is broken, because the scheme it tested was *very* slow.
+// TODO(frank): find long-term solution. that works w matlab/python.
 //TEST(Values, VectorFixedInsertDynamicRead) {
 //  Values values;
 //  Vector3 v; v << 5.0, 6.0, 7.0;
@@ -531,6 +587,7 @@ TEST(Values, MatrixDynamicInsertFixedRead) {
   CHECK(assert_equal((Vector)expected, values.at<Matrix13>(key1)));
   CHECK_EXCEPTION(values.at<Matrix23>(key1), exception);
 }
+
 /* ************************************************************************* */
 int main() { TestResult tr; return TestRegistry::runAllTests(tr); }
 /* ************************************************************************* */
diff --git a/gtsam/slam/RegularImplicitSchurFactor.h b/gtsam/slam/RegularImplicitSchurFactor.h
index 973798a64..7c481d0c8 100644
--- a/gtsam/slam/RegularImplicitSchurFactor.h
+++ b/gtsam/slam/RegularImplicitSchurFactor.h
@@ -130,7 +130,7 @@ public:
     // Do the Schur complement
     SymmetricBlockMatrix augmentedHessian = //
         Set::SchurComplement(FBlocks_, E_, b_);
-    return augmentedHessian.matrix();
+    return augmentedHessian.selfadjointView();
   }
 
   /// *Compute* full information matrix
diff --git a/gtsam/slam/expressions.h b/gtsam/slam/expressions.h
index e81c76bd6..f9f7125cc 100644
--- a/gtsam/slam/expressions.h
+++ b/gtsam/slam/expressions.h
@@ -40,6 +40,14 @@ inline Point3_ transform_from(const Pose3_& x, const Point3_& p) {
   return Point3_(x, &Pose3::transform_from, p);
 }
 
+inline Point3_ rotate(const Rot3_& x, const Point3_& p) {
+  return Point3_(x, &Rot3::rotate, p);
+}
+
+inline Point3_ unrotate(const Rot3_& x, const Point3_& p) {
+  return Point3_(x, &Rot3::unrotate, p);
+}
+
 // Projection
 
 typedef Expression<Cal3_S2> Cal3_S2_;
@@ -58,40 +66,37 @@ inline Point2_ project(const Unit3_& p_cam) {
 
 namespace internal {
 // Helper template for project2 expression below
-template<class CAMERA, class POINT>
-Point2 project4(const CAMERA& camera, const POINT& p,
-    OptionalJacobian<2, CAMERA::dimension> Dcam,
-    OptionalJacobian<2, FixedDimension<POINT>::value> Dpoint) {
+template <class CAMERA, class POINT>
+Point2 project4(const CAMERA& camera, const POINT& p, OptionalJacobian<2, CAMERA::dimension> Dcam,
+                OptionalJacobian<2, FixedDimension<POINT>::value> Dpoint) {
   return camera.project2(p, Dcam, Dpoint);
 }
 }
 
-template<class CAMERA, class POINT>
-Point2_ project2(const Expression<CAMERA>& camera_,
-    const Expression<POINT>& p_) {
+template <class CAMERA, class POINT>
+Point2_ project2(const Expression<CAMERA>& camera_, const Expression<POINT>& p_) {
   return Point2_(internal::project4<CAMERA, POINT>, camera_, p_);
 }
 
 namespace internal {
 // Helper template for project3 expression below
-template<class CALIBRATION, class POINT>
+template <class CALIBRATION, class POINT>
 inline Point2 project6(const Pose3& x, const Point3& p, const Cal3_S2& K,
-    OptionalJacobian<2, 6> Dpose, OptionalJacobian<2, 3> Dpoint,
-    OptionalJacobian<2, 5> Dcal) {
+                       OptionalJacobian<2, 6> Dpose, OptionalJacobian<2, 3> Dpoint,
+                       OptionalJacobian<2, 5> Dcal) {
   return PinholeCamera<Cal3_S2>(x, K).project(p, Dpose, Dpoint, Dcal);
 }
 }
 
-template<class CALIBRATION, class POINT>
+template <class CALIBRATION, class POINT>
 inline Point2_ project3(const Pose3_& x, const Expression<POINT>& p,
-    const Expression<CALIBRATION>& K) {
+                        const Expression<CALIBRATION>& K) {
   return Point2_(internal::project6<CALIBRATION, POINT>, x, p, K);
 }
 
-template<class CALIBRATION>
+template <class CALIBRATION>
 Point2_ uncalibrate(const Expression<CALIBRATION>& K, const Point2_& xy_hat) {
   return Point2_(K, &CALIBRATION::uncalibrate, xy_hat);
 }
 
-} // \namespace gtsam
-
+}  // \namespace gtsam
diff --git a/gtsam/slam/tests/testAntiFactor.cpp b/gtsam/slam/tests/testAntiFactor.cpp
index 1dc897599..7a95e0fa9 100644
--- a/gtsam/slam/tests/testAntiFactor.cpp
+++ b/gtsam/slam/tests/testAntiFactor.cpp
@@ -64,19 +64,14 @@ TEST( AntiFactor, NegativeHessian)
   GaussianFactor::shared_ptr antiGaussian = antiFactor->linearize(values);
   HessianFactor::shared_ptr antiHessian = boost::dynamic_pointer_cast<HessianFactor>(antiGaussian);
 
+  Matrix expected_information = -originalHessian->information();
+  Matrix actual_information = antiHessian->information();
+  EXPECT(assert_equal(expected_information, actual_information));
+
+  Vector expected_linear = -originalHessian->linearTerm();
+  Vector actual_linear = antiHessian->linearTerm();
+  EXPECT(assert_equal(expected_linear, actual_linear));
 
-  // Compare Hessian blocks
-  size_t variable_count = originalFactor->size();
-  for(size_t i = 0; i < variable_count; ++i){
-    for(size_t j = i; j < variable_count; ++j){
-      Matrix expected_G = -Matrix(originalHessian->info(originalHessian->begin()+i, originalHessian->begin()+j));
-      Matrix actual_G = antiHessian->info(antiHessian->begin()+i, antiHessian->begin()+j);
-      CHECK(assert_equal(expected_G, actual_G, 1e-5));
-    }
-    Vector expected_g = -originalHessian->linearTerm(originalHessian->begin()+i);
-    Vector actual_g = antiHessian->linearTerm(antiHessian->begin()+i);
-    CHECK(assert_equal(expected_g, actual_g, 1e-5));
-  }
   double expected_f = -originalHessian->constantTerm();
   double actual_f = antiHessian->constantTerm();
   EXPECT_DOUBLES_EQUAL(expected_f, actual_f, 1e-5);
diff --git a/gtsam/slam/tests/testSlamExpressions.cpp b/gtsam/slam/tests/testSlamExpressions.cpp
new file mode 100644
index 000000000..66b236ac2
--- /dev/null
+++ b/gtsam/slam/tests/testSlamExpressions.cpp
@@ -0,0 +1,53 @@
+/* ----------------------------------------------------------------------------
+
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
+ * Atlanta, Georgia 30332-0415
+ * All Rights Reserved
+ * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
+
+ * See LICENSE for the license information
+
+ * -------------------------------------------------------------------------- */
+
+/**
+ * @file testSlamExpressions.cpp
+ * @author Frank Dellaert
+ * @brief test expressions for SLAM
+ */
+
+#include <gtsam/slam/expressions.h>
+#include <gtsam/nonlinear/expressionTesting.h>
+#include <gtsam/geometry/CalibratedCamera.h>
+
+#include <CppUnitLite/TestHarness.h>
+
+#include <iostream>
+
+using namespace std;
+using namespace gtsam;
+
+/* ************************************************************************* */
+TEST(SlamExpressions, project2) {
+  typedef Expression<CalibratedCamera> CalibratedCamera_;
+
+  Rot3_ rot3_expr('r', 0);          // unknown rotation !
+  Point3_ t_expr(Point3(1, 2, 3));  // known translation
+  Pose3_ pose_expr(&Pose3::Create, rot3_expr, t_expr);
+  CalibratedCamera_ camera_expr(&CalibratedCamera::Create, pose_expr);
+  Point3_ point3_expr('p', 12);  // unknown 3D point with index 12, for funsies
+  Point2_ point2_expr = project2<CalibratedCamera>(camera_expr, point3_expr);
+
+  // Set the linearization point
+  Values values;
+  values.insert(Symbol('r', 0), Rot3());
+  values.insert(Symbol('p', 12), Point3(4, 5, 6));
+
+  EXPECT_CORRECT_EXPRESSION_JACOBIANS(point2_expr, values, 1e-7, 1e-5);
+}
+
+/* ************************************************************************* */
+int main() {
+  TestResult tr;
+  return TestRegistry::runAllTests(tr);
+}
+/* ************************************************************************* */
diff --git a/gtsam/slam/tests/testSmartProjectionCameraFactor.cpp b/gtsam/slam/tests/testSmartProjectionCameraFactor.cpp
index 0488c0f49..0db8149eb 100644
--- a/gtsam/slam/tests/testSmartProjectionCameraFactor.cpp
+++ b/gtsam/slam/tests/testSmartProjectionCameraFactor.cpp
@@ -645,8 +645,7 @@ TEST( SmartProjectionCameraFactor, comparisonGeneralSfMFactor ) {
   values.insert(l1, expectedPoint); // note: we get rid of possible errors in the triangulation
   Vector e1 = sfm1.evaluateError(values.at<Camera>(c1), values.at<Point3>(l1));
   Vector e2 = sfm2.evaluateError(values.at<Camera>(c2), values.at<Point3>(l1));
-  double actualError = 0.5
-      * (e1.norm() * e1.norm() + e2.norm() * e2.norm());
+  double actualError = 0.5 * (e1.squaredNorm() + e2.squaredNorm());
   double actualErrorGraph = generalGraph.error(values);
 
   DOUBLES_EQUAL(expectedErrorGraph, actualErrorGraph, 1e-7);
diff --git a/gtsam/symbolic/SymbolicJunctionTree.cpp b/gtsam/symbolic/SymbolicJunctionTree.cpp
index 00a52497b..261f682b0 100644
--- a/gtsam/symbolic/SymbolicJunctionTree.cpp
+++ b/gtsam/symbolic/SymbolicJunctionTree.cpp
@@ -23,7 +23,7 @@
 namespace gtsam {
 
   // Instantiate base class
-  template class ClusterTree<SymbolicBayesTree, SymbolicFactorGraph>;
+  template class EliminatableClusterTree<SymbolicBayesTree, SymbolicFactorGraph>;
   template class JunctionTree<SymbolicBayesTree, SymbolicFactorGraph>;
 
   /* ************************************************************************* */
diff --git a/gtsam/symbolic/SymbolicJunctionTree.h b/gtsam/symbolic/SymbolicJunctionTree.h
index 709488cbf..fdc5450b3 100644
--- a/gtsam/symbolic/SymbolicJunctionTree.h
+++ b/gtsam/symbolic/SymbolicJunctionTree.h
@@ -26,7 +26,7 @@ namespace gtsam {
   class SymbolicEliminationTree;
 
   /**
-   * A ClusterTree, i.e., a set of variable clusters with factors, arranged in a tree, with
+   * A EliminatableClusterTree, i.e., a set of variable clusters with factors, arranged in a tree, with
    * the additional property that it represents the clique tree associated with a Bayes net.
    *
    * In GTSAM a junction tree is an intermediate data structure in multifrontal
diff --git a/gtsam_unstable/nonlinear/BatchFixedLagSmoother.cpp b/gtsam_unstable/nonlinear/BatchFixedLagSmoother.cpp
index 405321ffc..a0ede5f74 100644
--- a/gtsam_unstable/nonlinear/BatchFixedLagSmoother.cpp
+++ b/gtsam_unstable/nonlinear/BatchFixedLagSmoother.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -22,13 +22,13 @@
 #include <gtsam/linear/GaussianJunctionTree.h>
 #include <gtsam/linear/GaussianFactorGraph.h>
 #include <gtsam/linear/GaussianFactor.h>
-//#include <gtsam/inference/inference.h>
-#include <gtsam/base/debug.h>
+
+using namespace std;
 
 namespace gtsam {
 
 /* ************************************************************************* */
-void BatchFixedLagSmoother::print(const std::string& s,
+void BatchFixedLagSmoother::print(const string& s,
     const KeyFormatter& keyFormatter) const {
   FixedLagSmoother::print(s, keyFormatter);
   // TODO: What else to print?
@@ -45,7 +45,7 @@ bool BatchFixedLagSmoother::equals(const FixedLagSmoother& rhs,
 
 /* ************************************************************************* */
 Matrix BatchFixedLagSmoother::marginalCovariance(Key key) const {
-  throw std::runtime_error(
+  throw runtime_error(
       "BatchFixedLagSmoother::marginalCovariance not implemented");
 }
 
@@ -54,17 +54,12 @@ FixedLagSmoother::Result BatchFixedLagSmoother::update(
     const NonlinearFactorGraph& newFactors, const Values& newTheta,
     const KeyTimestampMap& timestamps) {
 
-  const bool debug = ISDEBUG("BatchFixedLagSmoother update");
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::update() START" << std::endl;
-  }
-
   // Update all of the internal variables with the new information
   gttic(augment_system);
   // Add the new variables to theta
   theta_.insert(newTheta);
   // Add new variables to the end of the ordering
-  for(const Values::ConstKeyValuePair& key_value: newTheta) {
+  for (const auto& key_value : newTheta) {
     ordering_.push_back(key_value.key);
   }
   // Augment Delta
@@ -79,19 +74,10 @@ FixedLagSmoother::Result BatchFixedLagSmoother::update(
 
   // Get current timestamp
   double current_timestamp = getCurrentTimestamp();
-  if (debug)
-    std::cout << "Current Timestamp: " << current_timestamp << std::endl;
 
   // Find the set of variables to be marginalized out
-  std::set<Key> marginalizableKeys = findKeysBefore(
+  KeyVector marginalizableKeys = findKeysBefore(
       current_timestamp - smootherLag_);
-  if (debug) {
-    std::cout << "Marginalizable Keys: ";
-    for(Key key: marginalizableKeys) {
-      std::cout << DefaultKeyFormatter(key) << " ";
-    }
-    std::cout << std::endl;
-  }
 
   // Reorder
   gttic(reorder);
@@ -113,17 +99,13 @@ FixedLagSmoother::Result BatchFixedLagSmoother::update(
   }
   gttoc(marginalize);
 
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::update() FINISH" << std::endl;
-  }
-
   return result;
 }
 
 /* ************************************************************************* */
 void BatchFixedLagSmoother::insertFactors(
     const NonlinearFactorGraph& newFactors) {
-  for(const NonlinearFactor::shared_ptr& factor: newFactors) {
+  for(const auto& factor: newFactors) {
     Key index;
     // Insert the factor into an existing hole in the factor graph, if possible
     if (availableSlots_.size() > 0) {
@@ -143,7 +125,7 @@ void BatchFixedLagSmoother::insertFactors(
 
 /* ************************************************************************* */
 void BatchFixedLagSmoother::removeFactors(
-    const std::set<size_t>& deleteFactors) {
+    const set<size_t>& deleteFactors) {
   for(size_t slot: deleteFactors) {
     if (factors_.at(slot)) {
       // Remove references to this factor from the FactorIndex
@@ -156,14 +138,14 @@ void BatchFixedLagSmoother::removeFactors(
       availableSlots_.push(slot);
     } else {
       // TODO: Throw an error??
-      std::cout << "Attempting to remove a factor from slot " << slot
-          << ", but it is already NULL." << std::endl;
+      cout << "Attempting to remove a factor from slot " << slot
+          << ", but it is already NULL." << endl;
     }
   }
 }
 
 /* ************************************************************************* */
-void BatchFixedLagSmoother::eraseKeys(const std::set<Key>& keys) {
+void BatchFixedLagSmoother::eraseKeys(const KeyVector& keys) {
 
   for(Key key: keys) {
     // Erase the key from the values
@@ -182,50 +164,20 @@ void BatchFixedLagSmoother::eraseKeys(const std::set<Key>& keys) {
 
   // Remove marginalized keys from the ordering and delta
   for(Key key: keys) {
-    ordering_.erase(std::find(ordering_.begin(), ordering_.end(), key));
+    ordering_.erase(find(ordering_.begin(), ordering_.end(), key));
     delta_.erase(key);
   }
 }
 
 /* ************************************************************************* */
-void BatchFixedLagSmoother::reorder(const std::set<Key>& marginalizeKeys) {
-
-  const bool debug = ISDEBUG("BatchFixedLagSmoother reorder");
-
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::reorder() START" << std::endl;
-  }
-
-  if (debug) {
-    std::cout << "Marginalizable Keys: ";
-    for(Key key: marginalizeKeys) {
-      std::cout << DefaultKeyFormatter(key) << " ";
-    }
-    std::cout << std::endl;
-  }
-
+void BatchFixedLagSmoother::reorder(const KeyVector& marginalizeKeys) {
   // COLAMD groups will be used to place marginalize keys in Group 0, and everything else in Group 1
-  ordering_ = Ordering::ColamdConstrainedFirst(factors_,
-      std::vector<Key>(marginalizeKeys.begin(), marginalizeKeys.end()));
-
-  if (debug) {
-    ordering_.print("New Ordering: ");
-  }
-
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::reorder() FINISH" << std::endl;
-  }
+  ordering_ = Ordering::ColamdConstrainedFirst(factors_, marginalizeKeys);
 }
 
 /* ************************************************************************* */
 FixedLagSmoother::Result BatchFixedLagSmoother::optimize() {
 
-  const bool debug = ISDEBUG("BatchFixedLagSmoother optimize");
-
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::optimize() START" << std::endl;
-  }
-
   // Create output result structure
   Result result;
   result.nonlinearVariables = theta_.size() - linearKeys_.size();
@@ -247,20 +199,9 @@ FixedLagSmoother::Result BatchFixedLagSmoother::optimize() {
 
   // check if we're already close enough
   if (result.error <= errorTol) {
-    if (debug) {
-      std::cout << "BatchFixedLagSmoother::optimize  Exiting, as error = "
-          << result.error << " < " << errorTol << std::endl;
-    }
     return result;
   }
 
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::optimize  linearValues: "
-        << linearKeys_.size() << std::endl;
-    std::cout << "BatchFixedLagSmoother::optimize  Initial error: "
-        << result.error << std::endl;
-  }
-
   // Use a custom optimization loop so the linearization points can be controlled
   double previousError;
   VectorValues newDelta;
@@ -276,19 +217,14 @@ FixedLagSmoother::Result BatchFixedLagSmoother::optimize() {
       // Keep increasing lambda until we make make progress
       while (true) {
 
-        if (debug) {
-          std::cout << "BatchFixedLagSmoother::optimize  trying lambda = "
-              << lambda << std::endl;
-        }
-
         // Add prior factors at the current solution
         gttic(damp);
         GaussianFactorGraph dampedFactorGraph(linearFactorGraph);
         dampedFactorGraph.reserve(linearFactorGraph.size() + delta_.size());
         {
           // for each of the variables, add a prior at the current solution
-          double sigma = 1.0 / std::sqrt(lambda);
-          for(const VectorValues::KeyValuePair& key_value: delta_) {
+          double sigma = 1.0 / sqrt(lambda);
+          for(const auto& key_value: delta_) {
             size_t dim = key_value.second.size();
             Matrix A = Matrix::Identity(dim, dim);
             Vector b = key_value.second;
@@ -314,13 +250,6 @@ FixedLagSmoother::Result BatchFixedLagSmoother::optimize() {
         double error = factors_.error(evalpoint);
         gttoc(compute_error);
 
-        if (debug) {
-          std::cout << "BatchFixedLagSmoother::optimize  linear delta norm = "
-              << newDelta.norm() << std::endl;
-          std::cout << "BatchFixedLagSmoother::optimize  next error = " << error
-              << std::endl;
-        }
-
         if (error < result.error) {
           // Keep this change
           // Update the error value
@@ -332,7 +261,7 @@ FixedLagSmoother::Result BatchFixedLagSmoother::optimize() {
           // Put the linearization points and deltas back for specific variables
           if (enforceConsistency_ && (linearKeys_.size() > 0)) {
             theta_.update(linearKeys_);
-            for(const Values::ConstKeyValuePair& key_value: linearKeys_) {
+            for(const auto& key_value: linearKeys_) {
               delta_.at(key_value.key) = newDelta.at(key_value.key);
             }
           }
@@ -347,9 +276,9 @@ FixedLagSmoother::Result BatchFixedLagSmoother::optimize() {
           // Reject this change
           if (lambda >= lambdaUpperBound) {
             // The maximum lambda has been used. Print a warning and end the search.
-            std::cout
+            cout
                 << "Warning:  Levenberg-Marquardt giving up because cannot decrease error with maximum lambda"
-                << std::endl;
+                << endl;
             break;
           } else {
             // Increase lambda and continue searching
@@ -360,65 +289,30 @@ FixedLagSmoother::Result BatchFixedLagSmoother::optimize() {
     }
     gttoc(optimizer_iteration);
 
-    if (debug) {
-      std::cout << "BatchFixedLagSmoother::optimize  using lambda = " << lambda
-          << std::endl;
-    }
-
     result.iterations++;
   } while (result.iterations < maxIterations
       && !checkConvergence(relativeErrorTol, absoluteErrorTol, errorTol,
           previousError, result.error, NonlinearOptimizerParams::SILENT));
 
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::optimize  newError: " << result.error
-        << std::endl;
-  }
-
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::optimize() FINISH" << std::endl;
-  }
-
   return result;
 }
 
 /* ************************************************************************* */
-void BatchFixedLagSmoother::marginalize(const std::set<Key>& marginalizeKeys) {
+void BatchFixedLagSmoother::marginalize(const KeyVector& marginalizeKeys) {
   // In order to marginalize out the selected variables, the factors involved in those variables
   // must be identified and removed. Also, the effect of those removed factors on the
   // remaining variables needs to be accounted for. This will be done with linear container factors
   // from the result of a partial elimination. This function removes the marginalized factors and
   // adds the linearized factors back in.
 
-  const bool debug = ISDEBUG("BatchFixedLagSmoother marginalize");
-
-  if (debug)
-    std::cout << "BatchFixedLagSmoother::marginalize  Begin" << std::endl;
-
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::marginalize  Marginalize Keys: ";
-    for(Key key: marginalizeKeys) {
-      std::cout << DefaultKeyFormatter(key) << " ";
-    }
-    std::cout << std::endl;
-  }
-
   // Identify all of the factors involving any marginalized variable. These must be removed.
-  std::set<size_t> removedFactorSlots;
-  VariableIndex variableIndex(factors_);
+  set<size_t> removedFactorSlots;
+  const VariableIndex variableIndex(factors_);
   for(Key key: marginalizeKeys) {
     const FastVector<size_t>& slots = variableIndex[key];
     removedFactorSlots.insert(slots.begin(), slots.end());
   }
 
-  if (debug) {
-    std::cout << "BatchFixedLagSmoother::marginalize  Removed Factor Slots: ";
-    for(size_t slot: removedFactorSlots) {
-      std::cout << slot << " ";
-    }
-    std::cout << std::endl;
-  }
-
   // Add the removed factors to a factor graph
   NonlinearFactorGraph removedFactors;
   for(size_t slot: removedFactorSlots) {
@@ -427,20 +321,9 @@ void BatchFixedLagSmoother::marginalize(const std::set<Key>& marginalizeKeys) {
     }
   }
 
-  if (debug) {
-    PrintSymbolicGraph(removedFactors,
-        "BatchFixedLagSmoother::marginalize  Removed Factors: ");
-  }
-
   // Calculate marginal factors on the remaining keys
-  NonlinearFactorGraph marginalFactors = calculateMarginalFactors(
-      removedFactors, theta_, marginalizeKeys,
-      parameters_.getEliminationFunction());
-
-  if (debug) {
-    PrintSymbolicGraph(removedFactors,
-        "BatchFixedLagSmoother::marginalize  Marginal Factors: ");
-  }
+  NonlinearFactorGraph marginalFactors = CalculateMarginalFactors(
+      removedFactors, theta_, marginalizeKeys, parameters_.getEliminationFunction());
 
   // Remove marginalized factors from the factor graph
   removeFactors(removedFactorSlots);
@@ -453,140 +336,96 @@ void BatchFixedLagSmoother::marginalize(const std::set<Key>& marginalizeKeys) {
 }
 
 /* ************************************************************************* */
-void BatchFixedLagSmoother::PrintKeySet(const std::set<Key>& keys,
-    const std::string& label) {
-  std::cout << label;
-  for(gtsam::Key key: keys) {
-    std::cout << " " << gtsam::DefaultKeyFormatter(key);
+void BatchFixedLagSmoother::PrintKeySet(const set<Key>& keys,
+    const string& label) {
+  cout << label;
+  for(Key key: keys) {
+    cout << " " << DefaultKeyFormatter(key);
   }
-  std::cout << std::endl;
+  cout << endl;
 }
 
 /* ************************************************************************* */
-void BatchFixedLagSmoother::PrintKeySet(const gtsam::KeySet& keys,
-    const std::string& label) {
-  std::cout << label;
-  for(gtsam::Key key: keys) {
-    std::cout << " " << gtsam::DefaultKeyFormatter(key);
+void BatchFixedLagSmoother::PrintKeySet(const KeySet& keys,
+    const string& label) {
+  cout << label;
+  for(Key key: keys) {
+    cout << " " << DefaultKeyFormatter(key);
   }
-  std::cout << std::endl;
+  cout << endl;
 }
 
 /* ************************************************************************* */
 void BatchFixedLagSmoother::PrintSymbolicFactor(
     const NonlinearFactor::shared_ptr& factor) {
-  std::cout << "f(";
+  cout << "f(";
   if (factor) {
     for(Key key: factor->keys()) {
-      std::cout << " " << gtsam::DefaultKeyFormatter(key);
+      cout << " " << DefaultKeyFormatter(key);
     }
   } else {
-    std::cout << " NULL";
+    cout << " NULL";
   }
-  std::cout << " )" << std::endl;
+  cout << " )" << endl;
 }
 
 /* ************************************************************************* */
 void BatchFixedLagSmoother::PrintSymbolicFactor(
     const GaussianFactor::shared_ptr& factor) {
-  std::cout << "f(";
+  cout << "f(";
   for(Key key: factor->keys()) {
-    std::cout << " " << gtsam::DefaultKeyFormatter(key);
+    cout << " " << DefaultKeyFormatter(key);
   }
-  std::cout << " )" << std::endl;
+  cout << " )" << endl;
 }
 
 /* ************************************************************************* */
 void BatchFixedLagSmoother::PrintSymbolicGraph(
-    const NonlinearFactorGraph& graph, const std::string& label) {
-  std::cout << label << std::endl;
-  for(const NonlinearFactor::shared_ptr& factor: graph) {
+    const NonlinearFactorGraph& graph, const string& label) {
+  cout << label << endl;
+  for(const auto& factor: graph) {
     PrintSymbolicFactor(factor);
   }
 }
 
 /* ************************************************************************* */
 void BatchFixedLagSmoother::PrintSymbolicGraph(const GaussianFactorGraph& graph,
-    const std::string& label) {
-  std::cout << label << std::endl;
-  for(const GaussianFactor::shared_ptr& factor: graph) {
+    const string& label) {
+  cout << label << endl;
+  for(const auto& factor: graph) {
     PrintSymbolicFactor(factor);
   }
 }
 
 /* ************************************************************************* */
-NonlinearFactorGraph BatchFixedLagSmoother::calculateMarginalFactors(
-    const NonlinearFactorGraph& graph, const Values& theta,
-    const std::set<Key>& marginalizeKeys,
+GaussianFactorGraph BatchFixedLagSmoother::CalculateMarginalFactors(
+    const GaussianFactorGraph& graph, const KeyVector& keys,
     const GaussianFactorGraph::Eliminate& eliminateFunction) {
-
-  const bool debug = ISDEBUG("BatchFixedLagSmoother calculateMarginalFactors");
-
-  if (debug)
-    std::cout << "BatchFixedLagSmoother::calculateMarginalFactors START"
-        << std::endl;
-
-  if (debug)
-    PrintKeySet(marginalizeKeys,
-        "BatchFixedLagSmoother::calculateMarginalFactors  Marginalize Keys: ");
-
-  // Get the set of all keys involved in the factor graph
-  KeySet allKeys(graph.keys());
-  if (debug)
-    PrintKeySet(allKeys,
-        "BatchFixedLagSmoother::calculateMarginalFactors  All Keys: ");
-
-  if (!std::includes(allKeys.begin(), allKeys.end(), marginalizeKeys.begin(),
-      marginalizeKeys.end())) {
-    throw std::runtime_error("Some keys found in marginalizeKeys do not"
-                             " occur in the graph.");
-  }
-
-  // Calculate the set of RemainingKeys = AllKeys \Intersect marginalizeKeys
-  KeySet remainingKeys;
-  std::set_difference(allKeys.begin(), allKeys.end(), marginalizeKeys.begin(),
-      marginalizeKeys.end(), std::inserter(remainingKeys, remainingKeys.end()));
-  if (debug)
-    PrintKeySet(remainingKeys,
-        "BatchFixedLagSmoother::calculateMarginalFactors  Remaining Keys: ");
-
-  if (marginalizeKeys.size() == 0) {
+  if (keys.size() == 0) {
     // There are no keys to marginalize. Simply return the input factors
-    if (debug)
-      std::cout << "BatchFixedLagSmoother::calculateMarginalFactors FINISH"
-          << std::endl;
     return graph;
   } else {
-
-    // Create the linear factor graph
-    GaussianFactorGraph linearFactorGraph = *graph.linearize(theta);
     // .first is the eliminated Bayes tree, while .second is the remaining factor graph
-    GaussianFactorGraph marginalLinearFactors =
-        *linearFactorGraph.eliminatePartialMultifrontal(
-            std::vector<Key>(marginalizeKeys.begin(), marginalizeKeys.end())).second;
+    return *graph.eliminatePartialMultifrontal(keys, eliminateFunction).second;
+  }
+}
+
+/* ************************************************************************* */
+NonlinearFactorGraph BatchFixedLagSmoother::CalculateMarginalFactors(
+    const NonlinearFactorGraph& graph, const Values& theta, const KeyVector& keys,
+    const GaussianFactorGraph::Eliminate& eliminateFunction) {
+  if (keys.size() == 0) {
+    // There are no keys to marginalize. Simply return the input factors
+    return graph;
+  } else {
+    // Create the linear factor graph
+    const auto linearFactorGraph = graph.linearize(theta);
+
+    const auto marginalLinearFactors =
+        CalculateMarginalFactors(*linearFactorGraph, keys, eliminateFunction);
 
     // Wrap in nonlinear container factors
-    NonlinearFactorGraph marginalFactors;
-    marginalFactors.reserve(marginalLinearFactors.size());
-    for(const GaussianFactor::shared_ptr& gaussianFactor: marginalLinearFactors) {
-      marginalFactors += boost::make_shared<LinearContainerFactor>(
-          gaussianFactor, theta);
-      if (debug) {
-        std::cout
-            << "BatchFixedLagSmoother::calculateMarginalFactors  Marginal Factor: ";
-        PrintSymbolicFactor(marginalFactors.back());
-      }
-    }
-
-    if (debug)
-      PrintSymbolicGraph(marginalFactors,
-          "BatchFixedLagSmoother::calculateMarginalFactors  All Marginal Factors: ");
-
-    if (debug)
-      std::cout << "BatchFixedLagSmoother::calculateMarginalFactors FINISH"
-          << std::endl;
-
-    return marginalFactors;
+    return LinearContainerFactor::ConvertLinearGraph(marginalLinearFactors, theta);
   }
 }
 
diff --git a/gtsam_unstable/nonlinear/BatchFixedLagSmoother.h b/gtsam_unstable/nonlinear/BatchFixedLagSmoother.h
index 605f3a5e8..a6d25d8a1 100644
--- a/gtsam_unstable/nonlinear/BatchFixedLagSmoother.h
+++ b/gtsam_unstable/nonlinear/BatchFixedLagSmoother.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -103,8 +103,27 @@ public:
   /// Calculate marginal covariance on given variable
   Matrix marginalCovariance(Key key) const;
 
-  static NonlinearFactorGraph calculateMarginalFactors(const NonlinearFactorGraph& graph, const Values& theta,
-      const std::set<Key>& marginalizeKeys, const GaussianFactorGraph::Eliminate& eliminateFunction);
+  /// Marginalize specific keys from a linear graph.
+  /// Does not check whether keys actually exist in graph.
+  /// In that case will fail somewhere deep within elimination
+  static GaussianFactorGraph CalculateMarginalFactors(
+      const GaussianFactorGraph& graph, const KeyVector& keys,
+      const GaussianFactorGraph::Eliminate& eliminateFunction = EliminatePreferCholesky);
+
+  /// Marginalize specific keys from a nonlinear graph, wrap in LinearContainers
+  static NonlinearFactorGraph CalculateMarginalFactors(
+      const NonlinearFactorGraph& graph, const Values& theta, const KeyVector& keys,
+      const GaussianFactorGraph::Eliminate& eliminateFunction = EliminatePreferCholesky);
+
+#ifdef GTSAM_ALLOW_DEPRECATED_SINCE_V4
+  static NonlinearFactorGraph calculateMarginalFactors(
+      const NonlinearFactorGraph& graph, const Values& theta, const std::set<Key>& keys,
+      const GaussianFactorGraph::Eliminate& eliminateFunction) {
+    KeyVector keyVector(keys.begin(), keys.end());
+    return CalculateMarginalFactors(graph, theta, keyVector,
+                                    eliminateFunction = EliminatePreferCholesky);
+  }
+#endif
 
 protected:
 
@@ -147,16 +166,16 @@ protected:
   void removeFactors(const std::set<size_t>& deleteFactors);
 
   /** Erase any keys associated with timestamps before the provided time */
-  void eraseKeys(const std::set<Key>& keys);
+  void eraseKeys(const KeyVector& keys);
 
   /** Use colamd to update into an efficient ordering */
-  void reorder(const std::set<Key>& marginalizeKeys = std::set<Key>());
+  void reorder(const KeyVector& marginalizeKeys = KeyVector());
 
   /** Optimize the current graph using a modified version of L-M */
   Result optimize();
 
   /** Marginalize out selected variables */
-  void marginalize(const std::set<Key>& marginalizableKeys);
+  void marginalize(const KeyVector& marginalizableKeys);
 
 private:
   /** Private methods for printing debug information */
diff --git a/gtsam_unstable/nonlinear/FixedLagSmoother.cpp b/gtsam_unstable/nonlinear/FixedLagSmoother.cpp
index 3eecb58e0..62ee07a16 100644
--- a/gtsam_unstable/nonlinear/FixedLagSmoother.cpp
+++ b/gtsam_unstable/nonlinear/FixedLagSmoother.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -36,7 +36,7 @@ bool FixedLagSmoother::equals(const FixedLagSmoother& rhs, double tol) const {
 /* ************************************************************************* */
 void FixedLagSmoother::updateKeyTimestampMap(const KeyTimestampMap& timestamps) {
   // Loop through each key and add/update it in the map
-  for(const KeyTimestampMap::value_type& key_timestamp: timestamps) {
+  for(const auto& key_timestamp: timestamps) {
     // Check to see if this key already exists in the database
     KeyTimestampMap::iterator keyIter = keyTimestampMap_.find(key_timestamp.first);
 
@@ -64,7 +64,7 @@ void FixedLagSmoother::updateKeyTimestampMap(const KeyTimestampMap& timestamps)
 }
 
 /* ************************************************************************* */
-void FixedLagSmoother::eraseKeyTimestampMap(const std::set<Key>& keys) {
+void FixedLagSmoother::eraseKeyTimestampMap(const KeyVector& keys) {
   for(Key key: keys) {
     // Erase the key from the Timestamp->Key map
     double timestamp = keyTimestampMap_.at(key);
@@ -92,21 +92,21 @@ double FixedLagSmoother::getCurrentTimestamp() const {
 }
 
 /* ************************************************************************* */
-std::set<Key> FixedLagSmoother::findKeysBefore(double timestamp) const {
-  std::set<Key> keys;
+KeyVector FixedLagSmoother::findKeysBefore(double timestamp) const {
+  KeyVector keys;
   TimestampKeyMap::const_iterator end = timestampKeyMap_.lower_bound(timestamp);
   for(TimestampKeyMap::const_iterator iter = timestampKeyMap_.begin(); iter != end; ++iter) {
-    keys.insert(iter->second);
+    keys.push_back(iter->second);
   }
   return keys;
 }
 
 /* ************************************************************************* */
-std::set<Key> FixedLagSmoother::findKeysAfter(double timestamp) const {
-  std::set<Key> keys;
+KeyVector FixedLagSmoother::findKeysAfter(double timestamp) const {
+  KeyVector keys;
   TimestampKeyMap::const_iterator begin = timestampKeyMap_.upper_bound(timestamp);
   for(TimestampKeyMap::const_iterator iter = begin; iter != timestampKeyMap_.end(); ++iter) {
-    keys.insert(iter->second);
+    keys.push_back(iter->second);
   }
   return keys;
 }
diff --git a/gtsam_unstable/nonlinear/FixedLagSmoother.h b/gtsam_unstable/nonlinear/FixedLagSmoother.h
index f212b38e7..4868a7cf1 100644
--- a/gtsam_unstable/nonlinear/FixedLagSmoother.h
+++ b/gtsam_unstable/nonlinear/FixedLagSmoother.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -24,7 +24,9 @@
 #include <gtsam/inference/Key.h>
 #include <gtsam/nonlinear/NonlinearFactorGraph.h>
 #include <gtsam/nonlinear/Values.h>
+
 #include <map>
+#include <vector>
 
 namespace gtsam {
 
@@ -111,16 +113,16 @@ protected:
   void updateKeyTimestampMap(const KeyTimestampMap& newTimestamps);
 
   /** Erase keys from the Key-Timestamps database */
-  void eraseKeyTimestampMap(const std::set<Key>& keys);
+  void eraseKeyTimestampMap(const KeyVector& keys);
 
   /** Find the most recent timestamp of the system */
   double getCurrentTimestamp() const;
 
   /** Find all of the keys associated with timestamps before the provided time */
-  std::set<Key> findKeysBefore(double timestamp) const;
+  KeyVector findKeysBefore(double timestamp) const;
 
   /** Find all of the keys associated with timestamps before the provided time */
-  std::set<Key> findKeysAfter(double timestamp) const;
+  KeyVector findKeysAfter(double timestamp) const;
 
 }; // FixedLagSmoother
 
diff --git a/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.cpp b/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.cpp
index ddfd88cae..d3c34ff5e 100644
--- a/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.cpp
+++ b/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -88,7 +88,7 @@ FixedLagSmoother::Result IncrementalFixedLagSmoother::update(
     std::cout << "Current Timestamp: " << current_timestamp << std::endl;
 
   // Find the set of variables to be marginalized out
-  std::set<Key> marginalizableKeys = findKeysBefore(
+  KeyVector marginalizableKeys = findKeysBefore(
       current_timestamp - smootherLag_);
 
   if (debug) {
@@ -174,7 +174,7 @@ void IncrementalFixedLagSmoother::eraseKeysBefore(double timestamp) {
 
 /* ************************************************************************* */
 void IncrementalFixedLagSmoother::createOrderingConstraints(
-    const std::set<Key>& marginalizableKeys,
+    const KeyVector& marginalizableKeys,
     boost::optional<FastMap<Key, int> >& constrainedKeys) const {
   if (marginalizableKeys.size() > 0) {
     constrainedKeys = FastMap<Key, int>();
diff --git a/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.h b/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.h
index 31ae20c50..77d052a21 100644
--- a/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.h
+++ b/gtsam_unstable/nonlinear/IncrementalFixedLagSmoother.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -117,7 +117,7 @@ protected:
   void eraseKeysBefore(double timestamp);
 
   /** Fill in an iSAM2 ConstrainedKeys structure such that the provided keys are eliminated before all others */
-  void createOrderingConstraints(const std::set<Key>& marginalizableKeys,
+  void createOrderingConstraints(const KeyVector& marginalizableKeys,
       boost::optional<FastMap<Key, int> >& constrainedKeys) const;
 
 private:
diff --git a/gtsam_unstable/nonlinear/LinearizedFactor.cpp b/gtsam_unstable/nonlinear/LinearizedFactor.cpp
index a18c24256..e285955a7 100644
--- a/gtsam_unstable/nonlinear/LinearizedFactor.cpp
+++ b/gtsam_unstable/nonlinear/LinearizedFactor.cpp
@@ -139,21 +139,7 @@ LinearizedHessianFactor::LinearizedHessianFactor() {
 /* ************************************************************************* */
 LinearizedHessianFactor::LinearizedHessianFactor(
     const HessianFactor::shared_ptr& hessian, const Values& lin_points)
-: Base(hessian, lin_points) {
-
-  // Create the dims array
-  size_t *dims = (size_t*)alloca(sizeof(size_t)*(hessian->size() + 1));
-  size_t index = 0;
-  for(HessianFactor::const_iterator iter = hessian->begin(); iter != hessian->end(); ++iter) {
-    dims[index++] = hessian->getDim(iter);
-  }
-  dims[index] = 1;
-
-  // Update the BlockInfo accessor
-  info_ = SymmetricBlockMatrix(dims, dims+hessian->size()+1);
-  // Copy the augmented matrix holding G, g, and f
-  info_.full() = hessian->info();
-}
+    : Base(hessian, lin_points), info_(hessian->info()) {}
 
 /* ************************************************************************* */
 void LinearizedHessianFactor::print(const std::string& s, const KeyFormatter& keyFormatter) const {
@@ -165,7 +151,7 @@ void LinearizedHessianFactor::print(const std::string& s, const KeyFormatter& ke
     std::cout << keyFormatter(key) << " ";
   std::cout << std::endl;
 
-  gtsam::print(Matrix(info_.full()), "Ab^T * Ab: ");
+  gtsam::print(Matrix(info_.selfadjointView()), "Ab^T * Ab: ");
 
   lin_points_.print("Linearization Point: ");
 }
@@ -176,9 +162,9 @@ bool LinearizedHessianFactor::equals(const NonlinearFactor& expected, double tol
   const This *e = dynamic_cast<const This*> (&expected);
   if (e) {
 
-    Matrix thisMatrix = this->info_.full();
+    Matrix thisMatrix = this->info_.selfadjointView();
     thisMatrix(thisMatrix.rows()-1, thisMatrix.cols()-1) = 0.0;
-    Matrix rhsMatrix = e->info_.full();
+    Matrix rhsMatrix = e->info_.selfadjointView();
     rhsMatrix(rhsMatrix.rows()-1, rhsMatrix.cols()-1) = 0.0;
 
     return Base::equals(expected, tol)
@@ -234,16 +220,20 @@ LinearizedHessianFactor::linearize(const Values& c) const {
   //newInfo.rangeColumn(0, this->size(), this->size(), 0) -= squaredTerm().selfadjointView<Eigen::Upper>() * dx;
   Vector g = linearTerm() - squaredTerm() * dx;
   std::vector<Vector> gs;
+  std::size_t offset = 0;
   for(DenseIndex i = 0; i < info_.nBlocks()-1; ++i) {
-    gs.push_back(g.segment(info_.offset(i), info_.offset(i+1) - info_.offset(i)));
+    const std::size_t dim = info_.getDim(i);
+    gs.push_back(g.segment(offset, dim));
+    offset += dim;
   }
 
   // G2 = G1
   // Do Nothing
   std::vector<Matrix> Gs;
   for(DenseIndex i = 0; i < info_.nBlocks()-1; ++i) {
-    for(DenseIndex j = i; j < info_.nBlocks()-1; ++j) {
-      Gs.push_back(info_(i,j));
+    Gs.push_back(info_.diagonalBlock(i));
+    for(DenseIndex j = i + 1; j < info_.nBlocks()-1; ++j) {
+      Gs.push_back(info_.aboveDiagonalBlock(i, j));
     }
   }
 
diff --git a/gtsam_unstable/nonlinear/LinearizedFactor.h b/gtsam_unstable/nonlinear/LinearizedFactor.h
index c6710bd70..f9fa6033f 100644
--- a/gtsam_unstable/nonlinear/LinearizedFactor.h
+++ b/gtsam_unstable/nonlinear/LinearizedFactor.h
@@ -178,8 +178,9 @@ public:
   /** hessian block data types */
   typedef SymmetricBlockMatrix::Block Block; ///< A block from the Hessian matrix
   typedef SymmetricBlockMatrix::constBlock constBlock; ///< A block from the Hessian matrix (const version)
-  typedef SymmetricBlockMatrix::Block::OffDiagonal::ColXpr Column; ///< A column containing the linear term h
-  typedef SymmetricBlockMatrix::constBlock::OffDiagonal::ColXpr constColumn; ///< A column containing the linear term h (const version)
+
+  typedef SymmetricBlockMatrix::Block::ColXpr Column; ///< A column containing the linear term h
+  typedef SymmetricBlockMatrix::constBlock::ColXpr constColumn; ///< A column containing the linear term h (const version)
 
 protected:
 
@@ -216,19 +217,26 @@ public:
   /** Return the constant term \f$ f \f$ as described above
    * @return The constant term \f$ f \f$
    */
-  double constantTerm() const { return info_(this->size(), this->size())(0,0); };
+  double constantTerm() const {
+    const auto block = info_.diagonalBlock(size());
+    return block(0, 0);
+  }
 
   /** Return the part of linear term \f$ g \f$ as described above corresponding to the requested variable.
    * @param j Which block row to get, as an iterator pointing to the slot in this factor.  You can
    * use, for example, begin() + 2 to get the 3rd variable in this factor.
    * @return The linear term \f$ g \f$ */
-  constColumn linearTerm(const_iterator j) const { return info_(j - this->begin(), this->size()).knownOffDiagonal().col(0); }
+  constColumn linearTerm(const_iterator j) const {
+    return info_.aboveDiagonalRange(j - begin(), size(), size(), size() + 1).col(0);
+  }
 
   /** Return the complete linear term \f$ g \f$ as described above.
    * @return The linear term \f$ g \f$ */
-  constColumn linearTerm() const { return info_.range(0, this->size(), this->size(), this->size() + 1).knownOffDiagonal().col(0); };
+  constColumn linearTerm() const {
+    return info_.aboveDiagonalRange(0, size(), size(), size() + 1).col(0);
+  }
 
-  /** Return a view of the block at (j1,j2) of the <emph>upper-triangular part</emph> of the
+  /** Return a copy of the block at (j1,j2) of the <emph>upper-triangular part</emph> of the
    * squared term \f$ H \f$, no data is copied.  See HessianFactor class documentation
    * above to explain that only the upper-triangular part of the information matrix is stored
    * and returned by this function.
@@ -236,19 +244,24 @@ public:
    * use, for example, begin() + 2 to get the 3rd variable in this factor.
    * @param j2 Which block column to get, as an iterator pointing to the slot in this factor.  You can
    * use, for example, begin() + 2 to get the 3rd variable in this factor.
-   * @return A view of the requested block, not a copy.
+   * @return A copy of the requested block.
    */
-  constBlock squaredTerm(const_iterator j1, const_iterator j2) const { return info_(j1-begin(), j2-begin()); }
+  Matrix squaredTerm(const_iterator j1, const_iterator j2) const {
+    const DenseIndex J1 = j1 - begin();
+    const DenseIndex J2 = j2 - begin();
+    return info_.block(J1, J2);
+  }
 
   /** Return the <emph>upper-triangular part</emph> of the full squared term, as described above.
    * See HessianFactor class documentation above to explain that only the
    * upper-triangular part of the information matrix is stored and returned by this function.
    */
-  constBlock::SelfAdjointView squaredTerm() const { return info_.range(0, this->size(), 0, this->size()).selfadjointView(); }
-
+  Eigen::SelfAdjointView<constBlock, Eigen::Upper> squaredTerm() const {
+    return info_.selfadjointView(0, size());
+  }
 
   /** get the dimension of the factor (number of rows on linearization) */
-  size_t dim() const { return info_.rows() - 1; };
+  size_t dim() const { return info_.rows() - 1; }
 
   /** Calculate the error of the factor */
   double error(const Values& c) const;
diff --git a/gtsam_unstable/nonlinear/NonlinearClusterTree.h b/gtsam_unstable/nonlinear/NonlinearClusterTree.h
new file mode 100644
index 000000000..b1f463c26
--- /dev/null
+++ b/gtsam_unstable/nonlinear/NonlinearClusterTree.h
@@ -0,0 +1,101 @@
+/**
+ * @file NonlinearClusterTree.h
+ * @author Frank Dellaert
+ * @date   March, 2016
+ */
+
+#pragma once
+
+#include <gtsam/nonlinear/NonlinearFactorGraph.h>
+#include <gtsam/linear/GaussianBayesTree.h>
+#include <gtsam/inference/ClusterTree.h>
+
+namespace gtsam {
+class NonlinearClusterTree : public ClusterTree<NonlinearFactorGraph> {
+ public:
+  NonlinearClusterTree() {}
+
+  struct NonlinearCluster : Cluster {
+    // Given graph, index, add factors with specified keys into
+    // Factors are erased in the graph
+    // TODO(frank): fairly hacky and inefficient. Think about iterating the graph once instead
+    NonlinearCluster(const VariableIndex& variableIndex, const std::vector<Key>& keys,
+                     NonlinearFactorGraph* graph) {
+      for (const Key key : keys) {
+        std::vector<NonlinearFactor::shared_ptr> factors;
+        for (auto i : variableIndex[key])
+          if (graph->at(i)) {
+            factors.push_back(graph->at(i));
+            graph->remove(i);
+          }
+        Cluster::addFactors(key, factors);
+      }
+    }
+
+    GaussianFactorGraph::shared_ptr linearize(const Values& values) {
+      return factors.linearize(values);
+    }
+
+    static NonlinearCluster* DownCast(const boost::shared_ptr<Cluster>& cluster) {
+      auto nonlinearCluster = boost::dynamic_pointer_cast<NonlinearCluster>(cluster);
+      if (!nonlinearCluster)
+        throw std::runtime_error("Expected NonlinearCluster");
+      return nonlinearCluster.get();
+    }
+
+    // linearize local custer factors straight into hessianFactor, which is returned
+    // If no ordering given, uses colamd
+    HessianFactor::shared_ptr linearizeToHessianFactor(
+        const Values& values, boost::optional<Ordering> ordering = boost::none,
+        const NonlinearFactorGraph::Dampen& dampen = nullptr) const {
+      if (!ordering)
+        ordering.reset(Ordering::ColamdConstrainedFirst(factors, orderedFrontalKeys, true));
+      return factors.linearizeToHessianFactor(values, *ordering, dampen);
+    }
+
+    // Recursively eliminate subtree rooted at this Cluster into a Bayes net and factor on parent
+    // TODO(frank): Use TBB to support deep trees and parallelism
+    std::pair<GaussianBayesNet, HessianFactor::shared_ptr> linearizeAndEliminate(
+        const Values& values, boost::optional<Ordering> ordering = boost::none,
+        const NonlinearFactorGraph::Dampen& dampen = nullptr) const {
+      // Linearize and create HessianFactor f(front,separator)
+      HessianFactor::shared_ptr localFactor = linearizeToHessianFactor(values, ordering, dampen);
+
+      // Get contributions f(front) from children, as well as p(children|front)
+      GaussianBayesNet bayesNet;
+      for (const auto& child : children) {
+        auto message = DownCast(child)->linearizeAndEliminate(values, &bayesNet);
+        message->updateHessian(localFactor.get());
+      }
+      auto gaussianConditional = localFactor->eliminateCholesky(orderedFrontalKeys);
+      bayesNet.add(gaussianConditional);
+      return {bayesNet, localFactor};
+    }
+
+    // Recursively eliminate subtree rooted at this Cluster
+    // Version that updates existing Bayes net and returns a new Hessian factor on parent clique
+    // It is possible to pass in a nullptr for the bayesNet if only interested in the new factor
+    HessianFactor::shared_ptr linearizeAndEliminate(
+        const Values& values, GaussianBayesNet* bayesNet,
+        boost::optional<Ordering> ordering = boost::none,
+        const NonlinearFactorGraph::Dampen& dampen = nullptr) const {
+      auto bayesNet_newFactor_pair = linearizeAndEliminate(values, ordering, dampen);
+      if (bayesNet) {
+        bayesNet->push_back(bayesNet_newFactor_pair.first);
+      }
+      return bayesNet_newFactor_pair.second;
+    }
+  };
+
+  // Linearize and update linearization point with values
+  Values updateCholesky(const Values& values) {
+    GaussianBayesNet bayesNet;
+    for (const auto& root : roots_) {
+      auto result = NonlinearCluster::DownCast(root)->linearizeAndEliminate(values);
+      bayesNet.push_back(result.first);
+    }
+    VectorValues delta = bayesNet.optimize();
+    return values.retract(delta);
+  }
+};
+}  // namespace gtsam
diff --git a/gtsam_unstable/nonlinear/tests/testNonlinearClusterTree.cpp b/gtsam_unstable/nonlinear/tests/testNonlinearClusterTree.cpp
new file mode 100644
index 000000000..9391814d4
--- /dev/null
+++ b/gtsam_unstable/nonlinear/tests/testNonlinearClusterTree.cpp
@@ -0,0 +1,155 @@
+/**
+ * @file testNonlinearClusterTree.cpp
+ * @author Frank Dellaert
+ * @date   March, 2016
+ */
+
+#include <gtsam_unstable/nonlinear/NonlinearClusterTree.h>
+#include <gtsam/sam/BearingRangeFactor.h>
+#include <gtsam/slam/PriorFactor.h>
+#include <gtsam/slam/BetweenFactor.h>
+#include <gtsam/geometry/Pose2.h>
+#include <gtsam/geometry/Point2.h>
+
+#include <gtsam/inference/Symbol.h>
+
+#include <CppUnitLite/TestHarness.h>
+
+using namespace gtsam;
+
+static const Symbol x1('x', 1), x2('x', 2), x3('x', 3);
+static const Symbol l1('l', 1), l2('l', 2);
+
+/* ************************************************************************* */
+NonlinearFactorGraph planarSLAMGraph() {
+  NonlinearFactorGraph graph;
+
+  // Prior on pose x1 at the origin.
+  Pose2 prior(0.0, 0.0, 0.0);
+  auto priorNoise = noiseModel::Diagonal::Sigmas(Vector3(0.3, 0.3, 0.1));
+  graph.add(PriorFactor<Pose2>(x1, prior, priorNoise));
+
+  // Two odometry factors
+  Pose2 odometry(2.0, 0.0, 0.0);
+  auto odometryNoise = noiseModel::Diagonal::Sigmas(Vector3(0.2, 0.2, 0.1));
+  graph.add(BetweenFactor<Pose2>(x1, x2, odometry, odometryNoise));
+  graph.add(BetweenFactor<Pose2>(x2, x3, odometry, odometryNoise));
+
+  // Add Range-Bearing measurements to two different landmarks
+  auto measurementNoise = noiseModel::Diagonal::Sigmas(Vector2(0.1, 0.2));
+  Rot2 bearing11 = Rot2::fromDegrees(45), bearing21 = Rot2::fromDegrees(90),
+       bearing32 = Rot2::fromDegrees(90);
+  double range11 = std::sqrt(4.0 + 4.0), range21 = 2.0, range32 = 2.0;
+  graph.add(BearingRangeFactor<Pose2, Point2>(x1, l1, bearing11, range11, measurementNoise));
+  graph.add(BearingRangeFactor<Pose2, Point2>(x2, l1, bearing21, range21, measurementNoise));
+  graph.add(BearingRangeFactor<Pose2, Point2>(x3, l2, bearing32, range32, measurementNoise));
+
+  return graph;
+}
+
+/* ************************************************************************* */
+// Create initial estimate
+Values planarSLAMValues() {
+  Values initial;
+  initial.insert(l1, Point2(1.8, 2.1));
+  initial.insert(l2, Point2(4.1, 1.8));
+  initial.insert(x1, Pose2(0.5, 0.0, 0.2));
+  initial.insert(x2, Pose2(2.3, 0.1, -0.2));
+  initial.insert(x3, Pose2(4.1, 0.1, 0.1));
+  return initial;
+}
+
+/* ************************************************************************* */
+TEST(NonlinearClusterTree, Clusters) {
+  NonlinearFactorGraph graph = planarSLAMGraph();
+  Values initial = planarSLAMValues();
+
+  // Build the clusters
+  // NOTE(frank): Order matters here as factors are removed!
+  VariableIndex variableIndex(graph);
+  typedef NonlinearClusterTree::NonlinearCluster Cluster;
+  auto marginalCluster = boost::shared_ptr<Cluster>(new Cluster(variableIndex, {x1}, &graph));
+  auto landmarkCluster = boost::shared_ptr<Cluster>(new Cluster(variableIndex, {l1, l2}, &graph));
+  auto rootCluster = boost::shared_ptr<Cluster>(new Cluster(variableIndex, {x2, x3}, &graph));
+
+  EXPECT_LONGS_EQUAL(3, marginalCluster->nrFactors());
+  EXPECT_LONGS_EQUAL(2, landmarkCluster->nrFactors());
+  EXPECT_LONGS_EQUAL(1, rootCluster->nrFactors());
+
+  EXPECT_LONGS_EQUAL(1, marginalCluster->nrFrontals());
+  EXPECT_LONGS_EQUAL(2, landmarkCluster->nrFrontals());
+  EXPECT_LONGS_EQUAL(2, rootCluster->nrFrontals());
+
+  // Test linearize
+  auto gfg = marginalCluster->linearize(initial);
+  EXPECT_LONGS_EQUAL(3, gfg->size());
+
+  // Calculate expected result of only evaluating the marginalCluster
+  Ordering ordering;
+  ordering.push_back(x1);
+  auto expected = gfg->eliminatePartialSequential(ordering);
+  auto expectedFactor = boost::dynamic_pointer_cast<HessianFactor>(expected.second->at(0));
+  if (!expectedFactor)
+    throw std::runtime_error("Expected HessianFactor");
+
+  // Linearize and eliminate the marginalCluster
+  auto actual = marginalCluster->linearizeAndEliminate(initial);
+  const GaussianBayesNet& bayesNet = actual.first;
+  const HessianFactor& factor = *actual.second;
+  EXPECT(assert_equal(*expected.first->at(0), *bayesNet.at(0), 1e-6));
+  EXPECT(assert_equal(*expectedFactor, factor, 1e-6));
+}
+
+/* ************************************************************************* */
+static NonlinearClusterTree Construct() {
+  // Build the clusters
+  // NOTE(frank): Order matters here as factors are removed!
+  NonlinearFactorGraph graph = planarSLAMGraph();
+  VariableIndex variableIndex(graph);
+  typedef NonlinearClusterTree::NonlinearCluster Cluster;
+  auto marginalCluster = boost::shared_ptr<Cluster>(new Cluster(variableIndex, {x1}, &graph));
+  auto landmarkCluster = boost::shared_ptr<Cluster>(new Cluster(variableIndex, {l1, l2}, &graph));
+  auto rootCluster = boost::shared_ptr<Cluster>(new Cluster(variableIndex, {x2, x3}, &graph));
+
+  // Build the tree
+  NonlinearClusterTree clusterTree;
+  clusterTree.addRoot(rootCluster);
+  rootCluster->addChild(landmarkCluster);
+  landmarkCluster->addChild(marginalCluster);
+
+  return clusterTree;
+}
+
+/* ************************************************************************* */
+TEST(NonlinearClusterTree, Construct) {
+  NonlinearClusterTree clusterTree = Construct();
+
+  EXPECT_LONGS_EQUAL(3, clusterTree[0].problemSize());
+  EXPECT_LONGS_EQUAL(3, clusterTree[0][0].problemSize());
+  EXPECT_LONGS_EQUAL(3, clusterTree[0][0][0].problemSize());
+}
+
+/* ************************************************************************* */
+TEST(NonlinearClusterTree, Solve) {
+  NonlinearClusterTree clusterTree = Construct();
+
+  Values expected;
+  expected.insert(l1, Point2(2, 2));
+  expected.insert(l2, Point2(4, 2));
+  expected.insert(x1, Pose2(0, 0, 0));
+  expected.insert(x2, Pose2(2, 0, 0));
+  expected.insert(x3, Pose2(4, 0, 0));
+
+  Values values = planarSLAMValues();
+  for (size_t i = 0; i < 4; i++)
+    values = clusterTree.updateCholesky(values);
+
+  EXPECT(assert_equal(expected, values, 1e-7));
+}
+
+/* ************************************************************************* */
+int main() {
+  TestResult tr;
+  return TestRegistry::runAllTests(tr);
+}
+/* ************************************************************************* */
diff --git a/python/handwritten/CMakeLists.txt b/python/handwritten/CMakeLists.txt
index 00900206c..fd022dd17 100644
--- a/python/handwritten/CMakeLists.txt
+++ b/python/handwritten/CMakeLists.txt
@@ -18,6 +18,13 @@ set_target_properties(gtsam_python PROPERTIES
 	SKIP_BUILD_RPATH              TRUE
 	CLEAN_DIRECT_OUTPUT           1
 )
+
+target_include_directories(gtsam_python SYSTEM PUBLIC ${EIGEN3_INCLUDE_DIR})
+target_include_directories(gtsam_python SYSTEM PUBLIC ${NUMPY_INCLUDE_DIRS})
+target_include_directories(gtsam_python SYSTEM PUBLIC ${PYTHON_INCLUDE_DIRS})
+target_include_directories(gtsam_python SYSTEM PUBLIC ${Boost_INCLUDE_DIRS})
+target_include_directories(gtsam_python SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include/)
+
 target_link_libraries(gtsam_python
                       ${Boost_PYTHON${BOOST_PYTHON_VERSION_SUFFIX_UPPERCASE}_LIBRARY}
                       ${PYTHON_LIBRARY} gtsam)
diff --git a/python/handwritten/geometry/Cal3_S2.cpp b/python/handwritten/geometry/Cal3_S2.cpp
index 339cd6a3c..440b21742 100644
--- a/python/handwritten/geometry/Cal3_S2.cpp
+++ b/python/handwritten/geometry/Cal3_S2.cpp
@@ -19,7 +19,7 @@
 #define NO_IMPORT_ARRAY
 #include <numpy_eigen/NumpyEigenConverter.hpp>
 
-#include "gtsam/geometry/Cal3_S2.h"
+#include <gtsam/geometry/Cal3_S2.h>
 
 using namespace boost::python;
 using namespace gtsam;
@@ -36,11 +36,12 @@ Vector3 (Cal3_S2::*calibrate2)(const Vector3 &) const = &Cal3_S2::calibrate;
 
 void exportCal3_S2(){
 
-class_<Cal3_S2>("Cal3_S2", init<>())
+class_<Cal3_S2, boost::shared_ptr<Cal3_S2> >("Cal3_S2", init<>())
   .def(init<double,double,double,double,double>())
   .def(init<const Vector &>())
-  .def(init<double,int,int>())
+  .def(init<double,int,int>(args("fov","w","h")))
   .def(init<std::string>())
+  .def(repr(self))
   .def("print", &Cal3_S2::print, print_overloads(args("s")))
   .def("equals", &Cal3_S2::equals, equals_overloads(args("q","tol")))
   .def("fx",&Cal3_S2::fx)
@@ -58,5 +59,6 @@ class_<Cal3_S2>("Cal3_S2", init<>())
   .def("calibrate",calibrate2)
   .def("between",&Cal3_S2::between, between_overloads())
 ;
+register_ptr_to_python< boost::shared_ptr<Cal3_S2> >();
 
-}
\ No newline at end of file
+}
diff --git a/python/handwritten/geometry/Rot3.cpp b/python/handwritten/geometry/Rot3.cpp
index b840718a3..440559e3e 100644
--- a/python/handwritten/geometry/Rot3.cpp
+++ b/python/handwritten/geometry/Rot3.cpp
@@ -53,6 +53,8 @@ void exportRot3(){
   class_<Rot3>("Rot3")
     .def(init<Point3,Point3,Point3>())
     .def(init<double,double,double,double,double,double,double,double,double>())
+    .def(init<double,double,double,double>())
+    .def(init<const Quaternion &>())
     .def(init<const Matrix3 &>())
     .def(init<const Matrix &>())
     .def("Quaternion", Quaternion_0, arg("q"), "Creates a Rot3 from an array [w,x,y,z] representing a quaternion")
@@ -81,6 +83,8 @@ void exportRot3(){
     .def("RzRyRx", RzRyRx_0, (arg("x"),arg("y"),arg("z")), "Rotations around Z, Y, then X axes as in http://en.wikipedia.org/wiki/Rotation_matrix, counterclockwise when looking from unchanging axis" )
     .def("RzRyRx", RzRyRx_1, arg("xyz"), "Rotations around Z, Y, then X axes as in http://en.wikipedia.org/wiki/Rotation_matrix, counterclockwise when looking from unchanging axis" )
     .staticmethod("RzRyRx")
+    .def("Ypr", &Rot3::Ypr)
+    .staticmethod("Ypr")
     .def("identity", &Rot3::identity)
     .staticmethod("identity")
     .def("AdjointMap", &Rot3::AdjointMap)
diff --git a/python/handwritten/linear/NoiseModel.cpp b/python/handwritten/linear/NoiseModel.cpp
index 00fa9d74f..e918e509d 100644
--- a/python/handwritten/linear/NoiseModel.cpp
+++ b/python/handwritten/linear/NoiseModel.cpp
@@ -102,6 +102,7 @@ void exportNoiseModels(){
   class_<BaseCallback,boost::noncopyable>("Base")
     .def("print", pure_virtual(&Base::print))
   ;
+  register_ptr_to_python< boost::shared_ptr<Base> >();
   
   // NOTE: We should use "Base" in "bases<...>", and not "BaseCallback" (it was not clear at the begining)
   class_<Gaussian, boost::shared_ptr<Gaussian>, bases<Base> >("Gaussian", no_init)
diff --git a/python/handwritten/nonlinear/LevenbergMarquardtOptimizer.cpp b/python/handwritten/nonlinear/LevenbergMarquardtOptimizer.cpp
index f1c6a0b73..58e1d116a 100644
--- a/python/handwritten/nonlinear/LevenbergMarquardtOptimizer.cpp
+++ b/python/handwritten/nonlinear/LevenbergMarquardtOptimizer.cpp
@@ -15,7 +15,7 @@ void exportLevenbergMarquardtOptimizer() {
       .def_readwrite("maxIterations", &GaussNewtonParams::maxIterations)
       .def_readwrite("relativeErrorTol", &GaussNewtonParams::relativeErrorTol);
 
-  class_<GaussNewtonOptimizer>(
+  class_<GaussNewtonOptimizer, boost::noncopyable>(
       "GaussNewtonOptimizer",
       init<const NonlinearFactorGraph&, const Values&, const GaussNewtonParams&>())
       .def("optimize", &GaussNewtonOptimizer::optimize,
@@ -31,7 +31,7 @@ void exportLevenbergMarquardtOptimizer() {
       .def("setUseFixedLambdaFactor", &LevenbergMarquardtParams::setUseFixedLambdaFactor)
       .def("setVerbosityLM", &LevenbergMarquardtParams::setVerbosityLM);
 
-  class_<LevenbergMarquardtOptimizer>(
+  class_<LevenbergMarquardtOptimizer, boost::noncopyable>(
       "LevenbergMarquardtOptimizer",
       init<const NonlinearFactorGraph&, const Values&, const LevenbergMarquardtParams&>())
       .def("optimize", &LevenbergMarquardtOptimizer::optimize,
diff --git a/python/handwritten/nonlinear/NonlinearFactor.cpp b/python/handwritten/nonlinear/NonlinearFactor.cpp
index 9a130d8e9..8ccc123fc 100644
--- a/python/handwritten/nonlinear/NonlinearFactor.cpp
+++ b/python/handwritten/nonlinear/NonlinearFactor.cpp
@@ -28,7 +28,7 @@ using namespace gtsam;
 // All pure virtual methods should be wrapped. Non-pure may be wrapped if we want to mimic the 
 // overloading through inheritance in Python.
 // See: http://www.boost.org/doc/libs/1_59_0/libs/python/doc/tutorial/doc/html/python/exposing.html#python.class_virtual_functions
-struct NonlinearFactorCallback : NonlinearFactor, wrapper<NonlinearFactor>
+struct NonlinearFactorWrap : NonlinearFactor, wrapper<NonlinearFactor>
 {
   double error (const Values & values) const {
     return this->get_override("error")(values);
@@ -41,9 +41,35 @@ struct NonlinearFactorCallback : NonlinearFactor, wrapper<NonlinearFactor>
   }
 };
 
-void exportNonlinearFactor(){
+// Similarly for NoiseModelFactor:
+struct NoiseModelFactorWrap : NoiseModelFactor, wrapper<NoiseModelFactor> {
+  // NOTE(frank): Add all these again as I can't figure out how to derive
+  double error (const Values & values) const {
+    return this->get_override("error")(values);
+  }
+  size_t dim () const {
+    return this->get_override("dim")();
+  }
+  boost::shared_ptr<GaussianFactor> linearize(const Values & values) const {
+    return this->get_override("linearize")(values);
+  }
+  Vector unwhitenedError(const Values& x,
+                         boost::optional<std::vector<Matrix>&> H = boost::none) const {
+    return this->get_override("unwhitenedError")(x, H);
+  }
+};
 
-  class_<NonlinearFactorCallback,boost::noncopyable>("NonlinearFactor")
-  ;
+void exportNonlinearFactor() {
+  class_<NonlinearFactorWrap, boost::noncopyable>("NonlinearFactor")
+      .def("error", pure_virtual(&NonlinearFactor::error))
+      .def("dim", pure_virtual(&NonlinearFactor::dim))
+      .def("linearize", pure_virtual(&NonlinearFactor::linearize));
+  register_ptr_to_python<boost::shared_ptr<NonlinearFactor> >();
 
-}
\ No newline at end of file
+  class_<NoiseModelFactorWrap, boost::noncopyable>("NoiseModelFactor")
+      .def("error", pure_virtual(&NoiseModelFactor::error))
+      .def("dim", pure_virtual(&NoiseModelFactor::dim))
+      .def("linearize", pure_virtual(&NoiseModelFactor::linearize))
+      .def("unwhitenedError", pure_virtual(&NoiseModelFactor::unwhitenedError));
+  register_ptr_to_python<boost::shared_ptr<NoiseModelFactor> >();
+}
diff --git a/python/handwritten/nonlinear/NonlinearFactorGraph.cpp b/python/handwritten/nonlinear/NonlinearFactorGraph.cpp
index 55929f3f1..6898133de 100644
--- a/python/handwritten/nonlinear/NonlinearFactorGraph.cpp
+++ b/python/handwritten/nonlinear/NonlinearFactorGraph.cpp
@@ -40,12 +40,15 @@ void exportNonlinearFactorGraph(){
   typedef NonlinearFactorGraph::sharedFactor sharedFactor;
 
   void (NonlinearFactorGraph::*push_back1)(const sharedFactor&) = &NonlinearFactorGraph::push_back;
+  void (NonlinearFactorGraph::*push_back2)(const NonlinearFactorGraph&) = &NonlinearFactorGraph::push_back;
   void (NonlinearFactorGraph::*add1)(const sharedFactor&) = &NonlinearFactorGraph::add;
 
   class_<NonlinearFactorGraph>("NonlinearFactorGraph", init<>())
     .def("size",&NonlinearFactorGraph::size)
     .def("push_back", push_back1)
+    .def("push_back", push_back2)
     .def("add", add1)
+    .def("error",  &NonlinearFactorGraph::error)
     .def("resize", &NonlinearFactorGraph::resize)
     .def("empty", &NonlinearFactorGraph::empty)
     .def("print", &NonlinearFactorGraph::print, print_overloads(args("s")))
diff --git a/python/handwritten/nonlinear/Values.cpp b/python/handwritten/nonlinear/Values.cpp
index 0d1349972..9e4aad83f 100644
--- a/python/handwritten/nonlinear/Values.cpp
+++ b/python/handwritten/nonlinear/Values.cpp
@@ -47,6 +47,8 @@ void exportValues(){
   void  (Values::*insert_pose3) (Key, const gtsam::Pose3&) = &Values::insert;
   void  (Values::*insert_bias) (Key, const Bias&) = &Values::insert;
   void  (Values::*insert_navstate) (Key, const NavState&) = &Values::insert;
+  void  (Values::*insert_vector) (Key, const gtsam::Vector&) = &Values::insert;
+  void  (Values::*insert_vector2) (Key, const gtsam::Vector2&) = &Values::insert;
   void  (Values::*insert_vector3) (Key, const gtsam::Vector3&) = &Values::insert;
 
   class_<Values>("Values", init<>())
@@ -67,6 +69,8 @@ void exportValues(){
   .def("insert", insert_pose3)
   .def("insert", insert_bias)
   .def("insert", insert_navstate)
+  .def("insert", insert_vector)
+  .def("insert", insert_vector2)
   .def("insert", insert_vector3)
   .def("atPoint2", &Values::at<Point2>)
   .def("atRot2", &Values::at<Rot2>)
@@ -76,6 +80,8 @@ void exportValues(){
   .def("atPose3", &Values::at<Pose3>)
   .def("atConstantBias", &Values::at<Bias>)
   .def("atNavState", &Values::at<NavState>)
+  .def("atVector", &Values::at<Vector>)
+  .def("atVector2", &Values::at<Vector2>)
   .def("atVector3", &Values::at<Vector3>)
   .def("exists", exists1)
   .def("keys", &Values::keys)
diff --git a/tests/testDoglegOptimizer.cpp b/tests/testDoglegOptimizer.cpp
index a2bb57e1c..954afa220 100644
--- a/tests/testDoglegOptimizer.cpp
+++ b/tests/testDoglegOptimizer.cpp
@@ -139,7 +139,7 @@ TEST(DoglegOptimizer, Iterate) {
     VectorValues dx_u = gbn.optimizeGradientSearch();
     VectorValues dx_n = gbn.optimize();
     DoglegOptimizerImpl::IterationResult result = DoglegOptimizerImpl::Iterate(Delta, DoglegOptimizerImpl::SEARCH_EACH_ITERATION, dx_u, dx_n, gbn, fg, config, fg.error(config));
-    Delta = result.Delta;
+    Delta = result.delta;
     EXPECT(result.f_error < fg.error(config)); // Check that error decreases
     Values newConfig(config.retract(result.dx_d));
     config = newConfig;
diff --git a/tests/testNonlinearFactorGraph.cpp b/tests/testNonlinearFactorGraph.cpp
index ee60f9714..9556a8018 100644
--- a/tests/testNonlinearFactorGraph.cpp
+++ b/tests/testNonlinearFactorGraph.cpp
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
 
- * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  * Atlanta, Georgia 30332-0415
  * All Rights Reserved
  * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
@@ -9,7 +9,7 @@
 
  * -------------------------------------------------------------------------- */
 
-/** 
+/**
  * @file    testNonlinearFactorGraph.cpp
  * @brief   Unit tests for Non-Linear Factor NonlinearFactorGraph
  * @brief   testNonlinearFactorGraph
@@ -107,9 +107,9 @@ TEST( NonlinearFactorGraph, linearize )
 {
   NonlinearFactorGraph fg = createNonlinearFactorGraph();
   Values initial = createNoisyValues();
-  GaussianFactorGraph linearized = *fg.linearize(initial);
+  GaussianFactorGraph linearFG = *fg.linearize(initial);
   GaussianFactorGraph expected = createGaussianFactorGraph();
-  CHECK(assert_equal(expected,linearized)); // Needs correct linearizations
+  CHECK(assert_equal(expected,linearFG)); // Needs correct linearizations
 }
 
 /* ************************************************************************* */
@@ -165,6 +165,38 @@ TEST( NonlinearFactorGraph, symbolic )
   EXPECT(assert_equal(expected, actual));
 }
 
+/* ************************************************************************* */
+TEST(NonlinearFactorGraph, UpdateCholesky) {
+  NonlinearFactorGraph fg = createNonlinearFactorGraph();
+  Values initial = createNoisyValues();
+
+  // solve conventionally
+  GaussianFactorGraph linearFG = *fg.linearize(initial);
+  auto delta = linearFG.optimizeDensely();
+  auto expected = initial.retract(delta);
+
+  // solve with new method
+  EXPECT(assert_equal(expected, fg.updateCholesky(initial)));
+
+  // solve with Ordering
+  Ordering ordering;
+  ordering += L(1), X(2), X(1);
+  EXPECT(assert_equal(expected, fg.updateCholesky(initial, ordering)));
+
+  // solve with new method, heavily damped
+  auto dampen = [](const HessianFactor::shared_ptr& hessianFactor) {
+    auto iterator = hessianFactor->begin();
+    for (; iterator != hessianFactor->end(); iterator++) {
+      const auto index = std::distance(hessianFactor->begin(), iterator);
+      auto block = hessianFactor->info().diagonalBlock(index);
+      for (int j = 0; j < block.rows(); j++) {
+        block(j, j) += 1e9;
+      }
+    }
+  };
+  EXPECT(assert_equal(initial, fg.updateCholesky(initial, boost::none, dampen), 1e-6));
+}
+
 /* ************************************************************************* */
 int main() { TestResult tr; return TestRegistry::runAllTests(tr); }
 /* ************************************************************************* */
diff --git a/tests/testNonlinearOptimizer.cpp b/tests/testNonlinearOptimizer.cpp
index 7a22abc67..9ddbbb1b2 100644
--- a/tests/testNonlinearOptimizer.cpp
+++ b/tests/testNonlinearOptimizer.cpp
@@ -32,9 +32,11 @@
 
 #include <CppUnitLite/TestHarness.h>
 
+#include <boost/range/adaptor/map.hpp>
 #include <boost/shared_ptr.hpp>
 #include <boost/assign/std/list.hpp> // for operator +=
 using namespace boost::assign;
+using boost::adaptors::map_values;
 
 #include <iostream>
 #include <fstream>
@@ -263,13 +265,6 @@ TEST_UNSAFE(NonlinearOptimizer, MoreOptimization) {
 
   // Try LM and Dogleg
   LevenbergMarquardtParams params = LevenbergMarquardtParams::LegacyDefaults();
-  //  params.setVerbosityLM("TRYDELTA");
-  //  params.setVerbosity("TERMINATION");
-  params.lambdaUpperBound = 1e9;
-//  params.relativeErrorTol = 0;
-//  params.absoluteErrorTol = 0;
-  //params.lambdaInitial = 10;
-
   {
     LevenbergMarquardtOptimizer optimizer(fg, init, params);
 
@@ -297,9 +292,11 @@ TEST_UNSAFE(NonlinearOptimizer, MoreOptimization) {
 
     // test the diagonal
     GaussianFactorGraph::shared_ptr linear = optimizer.linearize();
-    GaussianFactorGraph damped = *optimizer.buildDampedSystem(*linear);
-    VectorValues d = linear->hessianDiagonal(), //
-    expectedDiagonal = d + params.lambdaInitial * d;
+    VectorValues d = linear->hessianDiagonal();
+    VectorValues sqrtHessianDiagonal = d;
+    for (Vector& v : sqrtHessianDiagonal | map_values) v = v.cwiseSqrt();
+    GaussianFactorGraph damped = optimizer.buildDampedSystem(*linear, sqrtHessianDiagonal);
+    VectorValues  expectedDiagonal = d + params.lambdaInitial * d;
     EXPECT(assert_equal(expectedDiagonal, damped.hessianDiagonal()));
 
     // test convergence (does not!)
@@ -311,7 +308,7 @@ TEST_UNSAFE(NonlinearOptimizer, MoreOptimization) {
     EXPECT(assert_equal(expectedGradient,linear->gradientAtZero()));
 
     // Check that the gradient is zero for damped system (it is not!)
-    damped = *optimizer.buildDampedSystem(*linear);
+    damped = optimizer.buildDampedSystem(*linear, sqrtHessianDiagonal);
     VectorValues actualGradient = damped.gradientAtZero();
     EXPECT(assert_equal(expectedGradient,actualGradient));
 
@@ -364,8 +361,9 @@ TEST(NonlinearOptimizer, MoreOptimizationWithHuber) {
   expected.insert(1, Pose2(1,0,M_PI/2));
   expected.insert(2, Pose2(1,1,M_PI));
 
+  LevenbergMarquardtParams params;
   EXPECT(assert_equal(expected, GaussNewtonOptimizer(fg, init).optimize()));
-  EXPECT(assert_equal(expected, LevenbergMarquardtOptimizer(fg, init).optimize()));
+  EXPECT(assert_equal(expected, LevenbergMarquardtOptimizer(fg, init, params).optimize()));
   EXPECT(assert_equal(expected, DoglegOptimizer(fg, init).optimize()));
 }
 
@@ -463,7 +461,7 @@ TEST( NonlinearOptimizer, logfile )
 }
 
 /* ************************************************************************* */
-// Minimal traits example
+//// Minimal traits example
 struct MyType : public Vector3 {
   using Vector3::Vector3;
 };
@@ -471,11 +469,13 @@ struct MyType : public Vector3 {
 namespace gtsam {
 template <>
 struct traits<MyType> {
-  static bool Equals(const MyType&, const MyType&, double tol) {return true;}
+  static bool Equals(const MyType& a, const MyType& b, double tol) {
+    return (a - b).array().abs().maxCoeff() < tol;
+  }
   static void Print(const MyType&, const string&) {}
-  static int GetDimension(const MyType&) { return 3;}
-  static MyType Retract(const MyType&, const Vector3&) {return MyType();}
-  static Vector3 Local(const MyType&, const MyType&) {return Vector3();}
+  static int GetDimension(const MyType&) { return 3; }
+  static MyType Retract(const MyType& a, const Vector3& b) { return a + b; }
+  static Vector3 Local(const MyType& a, const MyType& b) { return b - a; }
 };
 }
 
diff --git a/timing/timeCholesky.cpp b/timing/timeCholesky.cpp
new file mode 100644
index 000000000..4dd349593
--- /dev/null
+++ b/timing/timeCholesky.cpp
@@ -0,0 +1,60 @@
+/* ----------------------------------------------------------------------------
+
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation, 
+ * Atlanta, Georgia 30332-0415
+ * All Rights Reserved
+ * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
+
+ * See LICENSE for the license information
+
+ * -------------------------------------------------------------------------- */
+
+/**
+ * @file    timeCholesky.cpp
+ * @brief   time Cholesky factorization
+ * @author  Frank Dellaert
+ * @date    March 4, 2016
+ */
+
+#include <gtsam/base/cholesky.h>
+
+#include <time.h>
+#include <iostream>
+#include <iomanip>      // std::setprecision
+
+using namespace std;
+using namespace gtsam;
+
+//#define TERNARY
+
+int main() {
+
+  Matrix top = (Matrix(7,7) <<
+                      4.0375,   3.4584,   3.5735,   2.4815,   2.1471,   2.7400,   2.2063,
+                          0.,   4.7267,   3.8423,   2.3624,   2.8091,   2.9579,   2.5914,
+                          0.,       0.,   5.1600,   2.0797,   3.4690,   3.2419,   2.9992,
+                          0.,       0.,       0.,   1.8786,   1.0535,   1.4250,   1.3347,
+                          0.,       0.,       0.,       0.,   3.0788,   2.6283,   2.3791,
+                          0.,       0.,       0.,       0.,       0.,   2.9227,   2.4056,
+                          0.,       0.,       0.,       0.,       0.,       0.,   2.5776).finished();
+
+  Matrix ABC(100,100);
+  ABC.topLeftCorner<7,7>() = top;
+  cout << setprecision(3);
+
+  size_t n = 100000;
+  for (size_t nFrontal = 1; nFrontal <= 7; nFrontal++) {
+    auto timeLog = clock();
+    for (size_t i = 0; i < n; i++) {
+      Matrix RSL(ABC);
+      choleskyPartial(RSL, nFrontal);
+    }
+    auto timeLog2 = clock();
+    auto seconds = (double)(timeLog2 - timeLog) / CLOCKS_PER_SEC;
+    cout << "partialCholesky " << nFrontal << ": ";
+    auto ms = ((double)seconds * 1000000 / n);
+    cout << ms << " ms, " << ms/nFrontal << " ms/dim" << endl;
+  }
+
+  return 0;
+}
diff --git a/timing/timeSFMBAL.h b/timing/timeSFMBAL.h
index 1642af7e8..1ca9f82d2 100644
--- a/timing/timeSFMBAL.h
+++ b/timing/timeSFMBAL.h
@@ -67,6 +67,7 @@ int optimize(const SfM_data& db, const NonlinearFactorGraph& graph,
   // Set parameters to be similar to ceres
   LevenbergMarquardtParams params;
   LevenbergMarquardtParams::SetCeresDefaults(&params);
+//  params.setLinearSolverType("SEQUENTIAL_CHOLESKY");
 //  params.setVerbosityLM("SUMMARY");
 
   if (gUseSchur) {