New unified allocAligned scheme
parent
dfb79632be
commit
73c20c2ae1
|
@ -19,15 +19,15 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
// The MSVC compiler workaround for the unsupported variable length array
|
|
||||||
// utilizes the std::unique_ptr<> custom deleter.
|
|
||||||
// See Expression<T>::valueAndJacobianMap() below.
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#include <memory>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <gtsam/nonlinear/internal/ExpressionNode.h>
|
#include <gtsam/nonlinear/internal/ExpressionNode.h>
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
namespace gtsam {
|
namespace gtsam {
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -145,9 +145,10 @@ T Expression<T>::value(const Values& values,
|
||||||
// Call private version that returns derivatives in H
|
// Call private version that returns derivatives in H
|
||||||
const auto [keys, dims] = keysAndDims();
|
const auto [keys, dims] = keysAndDims();
|
||||||
return valueAndDerivatives(values, keys, dims, *H);
|
return valueAndDerivatives(values, keys, dims, *H);
|
||||||
} else
|
} else {
|
||||||
// no derivatives needed, just return value
|
// no derivatives needed, just return value
|
||||||
return root_->value(values);
|
return root_->value(values);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -193,33 +194,36 @@ T Expression<T>::traceExecution(const Values& values,
|
||||||
static_cast<internal::ExecutionTraceStorage*>(traceStorage));
|
static_cast<internal::ExecutionTraceStorage*>(traceStorage));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allocate a single block of aligned memory using a unique_ptr.
|
||||||
|
inline std::unique_ptr<internal::ExecutionTraceStorage[]> allocAligned(size_t size) {
|
||||||
|
const size_t alignedSize = (size + internal::TraceAlignment - 1) / internal::TraceAlignment;
|
||||||
|
std::cerr << size << " : " << alignedSize << '\n';
|
||||||
|
return std::unique_ptr<internal::ExecutionTraceStorage[]>(
|
||||||
|
new internal::ExecutionTraceStorage[alignedSize]);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T Expression<T>::valueAndJacobianMap(const Values& values,
|
T Expression<T>::valueAndJacobianMap(const Values& values,
|
||||||
internal::JacobianMap& jacobians) const {
|
internal::JacobianMap& jacobians) const {
|
||||||
// The following piece of code is absolutely crucial for performance.
|
try {
|
||||||
// We allocate a block of memory on the stack, which can be done at runtime
|
// We allocate a single block of aligned memory using a unique_ptr.
|
||||||
// with modern C++ compilers. The traceExecution then fills this memory
|
const size_t size = traceSize();
|
||||||
// with an execution trace, made up entirely of "Record" structs, see
|
auto traceStorage = allocAligned(size);
|
||||||
// the FunctionalNode class in expression-inl.h
|
|
||||||
size_t size = traceSize();
|
|
||||||
|
|
||||||
// Windows does not support variable length arrays, so memory must be dynamically
|
// The traceExecution call then fills this memory
|
||||||
// allocated on Visual Studio. For more information see the issue below
|
// with an execution trace, made up entirely of "Record" structs, see
|
||||||
// https://bitbucket.org/gtborg/gtsam/issue/178/vlas-unsupported-in-visual-studio
|
// the FunctionalNode class in expression-inl.h
|
||||||
#ifdef _MSC_VER
|
internal::ExecutionTrace<T> trace;
|
||||||
std::unique_ptr<void, void(*)(void*)> traceStorageDeleter(
|
T value(this->traceExecution(values, trace, traceStorage.get()));
|
||||||
_aligned_malloc(size, internal::TraceAlignment),
|
|
||||||
[](void *ptr){ _aligned_free(ptr); });
|
|
||||||
auto traceStorage = static_cast<internal::ExecutionTraceStorage*>(traceStorageDeleter.get());
|
|
||||||
#else
|
|
||||||
internal::ExecutionTraceStorage traceStorage[size];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
internal::ExecutionTrace<T> trace;
|
// We then calculate the Jacobians using reverse automatic differentiation (AD).
|
||||||
T value(this->traceExecution(values, trace, traceStorage));
|
trace.startReverseAD1(jacobians);
|
||||||
trace.startReverseAD1(jacobians);
|
return value;
|
||||||
|
} catch (const std::bad_alloc &e) {
|
||||||
return value;
|
std::cerr << "valueAndJacobianMap exception: " << e.what() << '\n';
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
// Here traceStorage will be de-allocated properly.
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -261,7 +265,7 @@ struct apply_compose<double> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace internal
|
||||||
|
|
||||||
// Global methods:
|
// Global methods:
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue