From 8ee16c90180dbad00e6596e35b944cf024167774 Mon Sep 17 00:00:00 2001 From: dellaert Date: Sun, 19 Oct 2014 11:19:09 +0200 Subject: [PATCH] Comments for Paul --- gtsam_unstable/nonlinear/Expression-inl.h | 16 +++++++++++++++- gtsam_unstable/nonlinear/Expression.h | 5 +++++ gtsam_unstable/nonlinear/ExpressionFactor.h | 13 ++++++++++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/gtsam_unstable/nonlinear/Expression-inl.h b/gtsam_unstable/nonlinear/Expression-inl.h index 0da5727c1..3594ea61f 100644 --- a/gtsam_unstable/nonlinear/Expression-inl.h +++ b/gtsam_unstable/nonlinear/Expression-inl.h @@ -92,11 +92,25 @@ void handleLeafCase( //----------------------------------------------------------------------------- /** - * The ExecutionTrace class records a tree-structured expression's execution + * The ExecutionTrace class records a tree-structured expression's execution. + * + * The class looks a bit complicated but it is so for performance. * It is a tagged union that obviates the need to create * a ExecutionTrace subclass for Constants and Leaf Expressions. Instead * the key for the leaf is stored in the space normally used to store a * CallRecord*. Nothing is stored for a Constant. + * + * A full execution trace of a Binary(Unary(Binary(Leaf,Constant)),Leaf) would be: + * Trace(Function) -> + * BinaryRecord with two traces in it + * trace1(Function) -> + * UnaryRecord with one trace in it + * trace1(Function) -> + * BinaryRecord with two traces in it + * trace1(Leaf) + * trace2(Constant) + * trace2(Leaf) + * Hence, there are three Record structs, written to memory by traceExecution */ template class ExecutionTrace { diff --git a/gtsam_unstable/nonlinear/Expression.h b/gtsam_unstable/nonlinear/Expression.h index 12e101f14..0e9b1034d 100644 --- a/gtsam_unstable/nonlinear/Expression.h +++ b/gtsam_unstable/nonlinear/Expression.h @@ -124,6 +124,11 @@ public: /// Return value and derivatives, reverse AD version T reverse(const Values& values, JacobianMap& jacobians) const { + // The following piece of code is absolutely crucial for performance. + // We allocate a block of memory on the stack, which can be done at runtime + // with modern C++ compilers. The traceExecution then fills this memory + // with an execution trace, made up entirely of "Record" structs, see + // the FunctionalNode class in expression-inl.h size_t size = traceSize(); char raw[size]; ExecutionTrace trace; diff --git a/gtsam_unstable/nonlinear/ExpressionFactor.h b/gtsam_unstable/nonlinear/ExpressionFactor.h index 66ba025d2..cdf2d132e 100644 --- a/gtsam_unstable/nonlinear/ExpressionFactor.h +++ b/gtsam_unstable/nonlinear/ExpressionFactor.h @@ -104,8 +104,14 @@ public: virtual boost::shared_ptr linearize(const Values& x) const { - // Allocate memory on stack and create a view on it (saves a malloc) - double memory[dimension::value * augmentedCols_]; + // This method has been heavily optimized for maximum performance. + // We allocate a VerticalBlockMatrix on the stack first, and then create + // Eigen::Block views on this piece of memory which is then passed + // to [expression_.value] below, which writes directly into Ab_. + + // Another malloc saved by creating a Matrix on the stack + static const int Dim = dimension::value; + double memory[Dim * augmentedCols_]; Eigen::Map::value, Eigen::Dynamic> > // matrix(memory, dimension::value, augmentedCols_); matrix.setZero(); // zero out @@ -117,8 +123,9 @@ public: JacobianMap blocks; for (DenseIndex i = 0; i < size(); i++) blocks.insert(std::make_pair(keys_[i], Ab(i))); + // Evaluate error to get Jacobians and RHS vector b - T value = expression_.value(x, blocks); + T value = expression_.value(x, blocks); // <<< Reverse AD happens here ! Ab(size()).col(0) = -measurement_.localCoordinates(value); // Whiten the corresponding system now