From 73c20c2ae1c20c50b70e4408b531237f9587658d Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sat, 11 Feb 2023 12:37:39 -0800 Subject: [PATCH] New unified allocAligned scheme --- gtsam/nonlinear/Expression-inl.h | 66 +++++++++++++++++--------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/gtsam/nonlinear/Expression-inl.h b/gtsam/nonlinear/Expression-inl.h index 6cc0d408e..f15ad8575 100644 --- a/gtsam/nonlinear/Expression-inl.h +++ b/gtsam/nonlinear/Expression-inl.h @@ -19,15 +19,15 @@ #pragma once -// The MSVC compiler workaround for the unsupported variable length array -// utilizes the std::unique_ptr<> custom deleter. -// See Expression::valueAndJacobianMap() below. -#ifdef _MSC_VER -#include -#endif - #include +#include +#include +#include +#include +#include + + namespace gtsam { template @@ -145,9 +145,10 @@ T Expression::value(const Values& values, // Call private version that returns derivatives in H const auto [keys, dims] = keysAndDims(); return valueAndDerivatives(values, keys, dims, *H); - } else + } else { // no derivatives needed, just return value return root_->value(values); + } } template @@ -193,33 +194,36 @@ T Expression::traceExecution(const Values& values, static_cast(traceStorage)); } +// Allocate a single block of aligned memory using a unique_ptr. +inline std::unique_ptr allocAligned(size_t size) { + const size_t alignedSize = (size + internal::TraceAlignment - 1) / internal::TraceAlignment; + std::cerr << size << " : " << alignedSize << '\n'; + return std::unique_ptr( + new internal::ExecutionTraceStorage[alignedSize]); +} + template T Expression::valueAndJacobianMap(const Values& values, internal::JacobianMap& jacobians) const { - // The following piece of code is absolutely crucial for performance. - // We allocate a block of memory on the stack, which can be done at runtime - // with modern C++ compilers. The traceExecution then fills this memory - // with an execution trace, made up entirely of "Record" structs, see - // the FunctionalNode class in expression-inl.h - size_t size = traceSize(); + try { + // We allocate a single block of aligned memory using a unique_ptr. + const size_t size = traceSize(); + auto traceStorage = allocAligned(size); - // Windows does not support variable length arrays, so memory must be dynamically - // allocated on Visual Studio. For more information see the issue below - // https://bitbucket.org/gtborg/gtsam/issue/178/vlas-unsupported-in-visual-studio -#ifdef _MSC_VER - std::unique_ptr traceStorageDeleter( - _aligned_malloc(size, internal::TraceAlignment), - [](void *ptr){ _aligned_free(ptr); }); - auto traceStorage = static_cast(traceStorageDeleter.get()); -#else - internal::ExecutionTraceStorage traceStorage[size]; -#endif + // The traceExecution call then fills this memory + // with an execution trace, made up entirely of "Record" structs, see + // the FunctionalNode class in expression-inl.h + internal::ExecutionTrace trace; + T value(this->traceExecution(values, trace, traceStorage.get())); - internal::ExecutionTrace trace; - T value(this->traceExecution(values, trace, traceStorage)); - trace.startReverseAD1(jacobians); - - return value; + // We then calculate the Jacobians using reverse automatic differentiation (AD). + trace.startReverseAD1(jacobians); + return value; + } catch (const std::bad_alloc &e) { + std::cerr << "valueAndJacobianMap exception: " << e.what() << '\n'; + throw e; + } + // Here traceStorage will be de-allocated properly. } template @@ -261,7 +265,7 @@ struct apply_compose { } }; -} +} // namespace internal // Global methods: