New unified allocAligned scheme
parent
dfb79632be
commit
73c20c2ae1
|
@ -19,15 +19,15 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
// The MSVC compiler workaround for the unsupported variable length array
|
||||
// utilizes the std::unique_ptr<> custom deleter.
|
||||
// See Expression<T>::valueAndJacobianMap() below.
|
||||
#ifdef _MSC_VER
|
||||
#include <memory>
|
||||
#endif
|
||||
|
||||
#include <gtsam/nonlinear/internal/ExpressionNode.h>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace gtsam {
|
||||
|
||||
template<typename T>
|
||||
|
@ -145,10 +145,11 @@ T Expression<T>::value(const Values& values,
|
|||
// Call private version that returns derivatives in H
|
||||
const auto [keys, dims] = keysAndDims();
|
||||
return valueAndDerivatives(values, keys, dims, *H);
|
||||
} else
|
||||
} else {
|
||||
// no derivatives needed, just return value
|
||||
return root_->value(values);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const std::shared_ptr<internal::ExpressionNode<T> >& Expression<T>::root() const {
|
||||
|
@ -193,33 +194,36 @@ T Expression<T>::traceExecution(const Values& values,
|
|||
static_cast<internal::ExecutionTraceStorage*>(traceStorage));
|
||||
}
|
||||
|
||||
// Allocate a single block of aligned memory using a unique_ptr.
|
||||
inline std::unique_ptr<internal::ExecutionTraceStorage[]> allocAligned(size_t size) {
|
||||
const size_t alignedSize = (size + internal::TraceAlignment - 1) / internal::TraceAlignment;
|
||||
std::cerr << size << " : " << alignedSize << '\n';
|
||||
return std::unique_ptr<internal::ExecutionTraceStorage[]>(
|
||||
new internal::ExecutionTraceStorage[alignedSize]);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T Expression<T>::valueAndJacobianMap(const Values& values,
|
||||
internal::JacobianMap& jacobians) const {
|
||||
// The following piece of code is absolutely crucial for performance.
|
||||
// We allocate a block of memory on the stack, which can be done at runtime
|
||||
// with modern C++ compilers. The traceExecution then fills this memory
|
||||
try {
|
||||
// We allocate a single block of aligned memory using a unique_ptr.
|
||||
const size_t size = traceSize();
|
||||
auto traceStorage = allocAligned(size);
|
||||
|
||||
// The traceExecution call then fills this memory
|
||||
// with an execution trace, made up entirely of "Record" structs, see
|
||||
// the FunctionalNode class in expression-inl.h
|
||||
size_t size = traceSize();
|
||||
|
||||
// Windows does not support variable length arrays, so memory must be dynamically
|
||||
// allocated on Visual Studio. For more information see the issue below
|
||||
// https://bitbucket.org/gtborg/gtsam/issue/178/vlas-unsupported-in-visual-studio
|
||||
#ifdef _MSC_VER
|
||||
std::unique_ptr<void, void(*)(void*)> traceStorageDeleter(
|
||||
_aligned_malloc(size, internal::TraceAlignment),
|
||||
[](void *ptr){ _aligned_free(ptr); });
|
||||
auto traceStorage = static_cast<internal::ExecutionTraceStorage*>(traceStorageDeleter.get());
|
||||
#else
|
||||
internal::ExecutionTraceStorage traceStorage[size];
|
||||
#endif
|
||||
|
||||
internal::ExecutionTrace<T> trace;
|
||||
T value(this->traceExecution(values, trace, traceStorage));
|
||||
trace.startReverseAD1(jacobians);
|
||||
T value(this->traceExecution(values, trace, traceStorage.get()));
|
||||
|
||||
// We then calculate the Jacobians using reverse automatic differentiation (AD).
|
||||
trace.startReverseAD1(jacobians);
|
||||
return value;
|
||||
} catch (const std::bad_alloc &e) {
|
||||
std::cerr << "valueAndJacobianMap exception: " << e.what() << '\n';
|
||||
throw e;
|
||||
}
|
||||
// Here traceStorage will be de-allocated properly.
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
|
@ -261,7 +265,7 @@ struct apply_compose<double> {
|
|||
}
|
||||
};
|
||||
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
// Global methods:
|
||||
|
||||
|
|
Loading…
Reference in New Issue