Implement recovery behavior for LocalTrajectoryUploader (#1287)

master
Christoph Schütte 2018-07-20 00:09:44 +02:00 committed by Wally B. Feed
parent 5911181849
commit 7fa11dcde6
10 changed files with 201 additions and 29 deletions

View File

@ -185,10 +185,10 @@ class ClientServerTest : public ::testing::Test {
} }
void WaitForLocalSlamResultUploads(size_t size) { void WaitForLocalSlamResultUploads(size_t size) {
std::unique_lock<std::mutex> lock(local_slam_result_upload_mutex_); while (stub_->pose_graph()->GetTrajectoryNodePoses().size() < size) {
local_slam_result_upload_condition_.wait(lock, [&] { LOG(INFO) << stub_->pose_graph()->GetTrajectoryNodePoses().size();
return stub_->pose_graph()->GetTrajectoryNodePoses().size() >= size; std::this_thread::sleep_for(std::chrono::milliseconds(100));
}); }
} }
proto::MapBuilderServerOptions map_builder_server_options_; proto::MapBuilderServerOptions map_builder_server_options_;
@ -483,6 +483,52 @@ TEST_F(ClientServerTest, LocalSlam2DWithUploadingServer) {
server_->Shutdown(); server_->Shutdown();
} }
TEST_F(ClientServerTest, LocalSlam2DUplinkServerRestarting) {
InitializeRealServer();
server_->Start();
InitializeStub();
InitializeRealUploadingServer();
uploading_server_->Start();
InitializeStubForUploadingServer();
int trajectory_id = stub_for_uploading_server_->AddTrajectoryBuilder(
{kRangeSensorId}, trajectory_builder_options_,
local_slam_result_callback_);
TrajectoryBuilderInterface* trajectory_stub =
stub_for_uploading_server_->GetTrajectoryBuilder(trajectory_id);
const auto measurements = mapping::testing::GenerateFakeRangeMeasurements(
kTravelDistance, kDuration, kTimeStep);
// Insert half of the measurements.
for (unsigned int i = 0; i < measurements.size() / 2; ++i) {
trajectory_stub->AddSensorData(kRangeSensorId.id, measurements.at(i));
}
WaitForLocalSlamResults(measurements.size() / 2);
WaitForLocalSlamResultUploads(number_of_insertion_results_);
// Simulate a cloud server restart.
LOG(INFO) << "Simulating server restart.";
constexpr int kUplinkTrajectoryId = 0;
stub_->FinishTrajectory(kUplinkTrajectoryId);
server_->Shutdown();
server_->WaitForShutdown();
InitializeRealServer();
server_->Start();
InitializeStub();
// Insert the second half of the measurements.
for (unsigned int i = measurements.size() / 2; i < measurements.size(); ++i) {
trajectory_stub->AddSensorData(kRangeSensorId.id, measurements.at(i));
}
WaitForLocalSlamResults(measurements.size() / 2);
WaitForLocalSlamResultUploads(2);
stub_for_uploading_server_->FinishTrajectory(trajectory_id);
uploading_server_->Shutdown();
uploading_server_->WaitForShutdown();
server_->Shutdown();
server_->WaitForShutdown();
}
TEST_F(ClientServerTest, LoadState) { TEST_F(ClientServerTest, LoadState) {
InitializeRealServer(); InitializeRealServer();
server_->Start(); server_->Start();

View File

@ -60,10 +60,15 @@ void AddTrajectoryHandler::OnRequest(
// Ignore initial poses in trajectory_builder_options. // Ignore initial poses in trajectory_builder_options.
trajectory_builder_options.clear_initial_trajectory_pose(); trajectory_builder_options.clear_initial_trajectory_pose();
GetContext<MapBuilderContextInterface>() if (!GetContext<MapBuilderContextInterface>()
->local_trajectory_uploader() ->local_trajectory_uploader()
->AddTrajectory(request.client_id(), trajectory_id, expected_sensor_ids, ->AddTrajectory(request.client_id(), trajectory_id,
trajectory_builder_options); expected_sensor_ids, trajectory_builder_options)) {
LOG(ERROR) << "Failed to create trajectory in uplink: " << trajectory_id;
Finish(::grpc::Status(::grpc::INTERNAL,
"Failed to create trajectory in uplink"));
return;
}
} }
auto response = common::make_unique<proto::AddTrajectoryResponse>(); auto response = common::make_unique<proto::AddTrajectoryResponse>();

View File

@ -135,7 +135,8 @@ TEST_F(AddTrajectoryHandlerTest, WithLocalSlamUploader) {
EXPECT_CALL(*mock_local_trajectory_uploader_, EXPECT_CALL(*mock_local_trajectory_uploader_,
AddTrajectory(Eq("CLIENT_ID"), Eq(13), ParseSensorIds(request), AddTrajectory(Eq("CLIENT_ID"), Eq(13), ParseSensorIds(request),
Truly(testing::BuildProtoPredicateEquals( Truly(testing::BuildProtoPredicateEquals(
&upstream_trajectory_builder_options)))); &upstream_trajectory_builder_options))))
.WillOnce(Return(13));
test_server_->SendWrite(request); test_server_->SendWrite(request);
EXPECT_EQ(test_server_->response().trajectory_id(), 13); EXPECT_EQ(test_server_->response().trajectory_id(), 13);
} }

View File

@ -40,7 +40,35 @@ constexpr int kConnectionTimeoutInSeconds = 10;
constexpr int kTokenRefreshIntervalInSeconds = 60; constexpr int kTokenRefreshIntervalInSeconds = 60;
const common::Duration kPopTimeout = common::FromMilliseconds(100); const common::Duration kPopTimeout = common::FromMilliseconds(100);
// This defines the '::grpc::StatusCode's that are considered unrecoverable
// errors and hence no retries will be attempted by the client.
const std::set<::grpc::StatusCode> kUnrecoverableStatusCodes = {
::grpc::NOT_FOUND};
bool IsNewSubmap(const mapping::proto::Submap& submap) {
return (submap.has_submap_2d() && submap.submap_2d().num_range_data() == 1) ||
(submap.has_submap_3d() && submap.submap_3d().num_range_data() == 1);
}
class LocalTrajectoryUploader : public LocalTrajectoryUploaderInterface { class LocalTrajectoryUploader : public LocalTrajectoryUploaderInterface {
public:
struct TrajectoryInfo {
TrajectoryInfo() = default;
TrajectoryInfo(
const int uplink_trajectory_id,
const std::set<SensorId>& expected_sensor_ids,
const mapping::proto::TrajectoryBuilderOptions& trajectory_options,
const std::string& client_id)
: uplink_trajectory_id(uplink_trajectory_id),
expected_sensor_ids(expected_sensor_ids),
trajectory_options(trajectory_options),
client_id(client_id) {}
const int uplink_trajectory_id;
const std::set<SensorId> expected_sensor_ids;
const mapping::proto::TrajectoryBuilderOptions trajectory_options;
const std::string client_id;
};
public: public:
LocalTrajectoryUploader(const std::string& uplink_server_address, LocalTrajectoryUploader(const std::string& uplink_server_address,
int batch_size, bool enable_ssl_encryption, int batch_size, bool enable_ssl_encryption,
@ -54,13 +82,14 @@ class LocalTrajectoryUploader : public LocalTrajectoryUploaderInterface {
// complete. // complete.
void Shutdown() final; void Shutdown() final;
void AddTrajectory( bool AddTrajectory(
const std::string& client_id, int local_trajectory_id, const std::string& client_id, int local_trajectory_id,
const std::set<SensorId>& expected_sensor_ids, const std::set<SensorId>& expected_sensor_ids,
const mapping::proto::TrajectoryBuilderOptions& trajectory_options) final; const mapping::proto::TrajectoryBuilderOptions& trajectory_options) final;
void FinishTrajectory(const std::string& client_id, void FinishTrajectory(const std::string& client_id,
int local_trajectory_id) final; int local_trajectory_id) final;
void EnqueueSensorData(std::unique_ptr<proto::SensorData> sensor_data) final; void EnqueueSensorData(std::unique_ptr<proto::SensorData> sensor_data) final;
void TryRecovery();
SensorId GetLocalSlamResultSensorId(int local_trajectory_id) const final { SensorId GetLocalSlamResultSensorId(int local_trajectory_id) const final {
return SensorId{SensorId::SensorType::LOCAL_SLAM_RESULT, return SensorId{SensorId::SensorType::LOCAL_SLAM_RESULT,
@ -73,7 +102,7 @@ class LocalTrajectoryUploader : public LocalTrajectoryUploaderInterface {
std::shared_ptr<::grpc::Channel> client_channel_; std::shared_ptr<::grpc::Channel> client_channel_;
int batch_size_; int batch_size_;
std::map<int, int> local_to_cloud_trajectory_id_map_; std::map<int, TrajectoryInfo> local_trajectory_id_to_trajectory_info_;
common::BlockingQueue<std::unique_ptr<proto::SensorData>> send_queue_; common::BlockingQueue<std::unique_ptr<proto::SensorData>> send_queue_;
bool shutting_down_ = false; bool shutting_down_ = false;
std::unique_ptr<std::thread> upload_thread_; std::unique_ptr<std::thread> upload_thread_;
@ -120,6 +149,39 @@ void LocalTrajectoryUploader::Shutdown() {
upload_thread_->join(); upload_thread_->join();
} }
void LocalTrajectoryUploader::TryRecovery() {
// Wind the sensor_data_queue forward to the next new submap.
while (true) {
proto::SensorData* sensor_data =
send_queue_.PeekWithTimeout<proto::SensorData>(kPopTimeout);
if (sensor_data) {
CHECK_GE(sensor_data->local_slam_result_data().submaps_size(), 0);
if (sensor_data->sensor_data_case() ==
proto::SensorData::kLocalSlamResultData &&
sensor_data->local_slam_result_data().submaps_size() > 0 &&
IsNewSubmap(sensor_data->local_slam_result_data().submaps(
sensor_data->local_slam_result_data().submaps_size() - 1))) {
break;
} else {
send_queue_.Pop();
}
}
}
// Attempt to recreate the trajectories.
const auto local_trajectory_id_to_trajectory_info =
local_trajectory_id_to_trajectory_info_;
local_trajectory_id_to_trajectory_info_.clear();
for (const auto& entry : local_trajectory_id_to_trajectory_info) {
if (!AddTrajectory(entry.second.client_id, entry.first,
entry.second.expected_sensor_ids,
entry.second.trajectory_options)) {
LOG(ERROR) << "Failed to create trajectory. Aborting recovery attempt.";
return;
}
}
}
void LocalTrajectoryUploader::ProcessSendQueue() { void LocalTrajectoryUploader::ProcessSendQueue() {
LOG(INFO) << "Starting uploader thread."; LOG(INFO) << "Starting uploader thread.";
proto::AddSensorDataBatchRequest batch_request; proto::AddSensorDataBatchRequest batch_request;
@ -143,10 +205,18 @@ void LocalTrajectoryUploader::ProcessSendQueue() {
if (batch_request.sensor_data_size() == batch_size_) { if (batch_request.sensor_data_size() == batch_size_) {
async_grpc::Client<handlers::AddSensorDataBatchSignature> client( async_grpc::Client<handlers::AddSensorDataBatchSignature> client(
client_channel_, async_grpc::CreateUnlimitedConstantDelayStrategy( client_channel_,
common::FromSeconds(1))); async_grpc::CreateUnlimitedConstantDelayStrategy(
CHECK(client.Write(batch_request)); common::FromSeconds(1), kUnrecoverableStatusCodes));
if (client.Write(batch_request)) {
LOG(INFO) << "Uploaded " << batch_request.ByteSize()
<< " bytes of sensor data.";
batch_request.clear_sensor_data();
continue;
}
// Unrecoverable error occurred. Attempt recovery.
batch_request.clear_sensor_data(); batch_request.clear_sensor_data();
TryRecovery();
} }
} }
} }
@ -154,12 +224,13 @@ void LocalTrajectoryUploader::ProcessSendQueue() {
void LocalTrajectoryUploader::TranslateTrajectoryId( void LocalTrajectoryUploader::TranslateTrajectoryId(
proto::SensorMetadata* sensor_metadata) { proto::SensorMetadata* sensor_metadata) {
int cloud_trajectory_id = int cloud_trajectory_id = local_trajectory_id_to_trajectory_info_
local_to_cloud_trajectory_id_map_.at(sensor_metadata->trajectory_id()); .at(sensor_metadata->trajectory_id())
.uplink_trajectory_id;
sensor_metadata->set_trajectory_id(cloud_trajectory_id); sensor_metadata->set_trajectory_id(cloud_trajectory_id);
} }
void LocalTrajectoryUploader::AddTrajectory( bool LocalTrajectoryUploader::AddTrajectory(
const std::string& client_id, int local_trajectory_id, const std::string& client_id, int local_trajectory_id,
const std::set<SensorId>& expected_sensor_ids, const std::set<SensorId>& expected_sensor_ids,
const mapping::proto::TrajectoryBuilderOptions& trajectory_options) { const mapping::proto::TrajectoryBuilderOptions& trajectory_options) {
@ -176,17 +247,30 @@ void LocalTrajectoryUploader::AddTrajectory(
cloud::ToProto(GetLocalSlamResultSensorId(local_trajectory_id)); cloud::ToProto(GetLocalSlamResultSensorId(local_trajectory_id));
async_grpc::Client<handlers::AddTrajectorySignature> client(client_channel_); async_grpc::Client<handlers::AddTrajectorySignature> client(client_channel_);
::grpc::Status status; ::grpc::Status status;
CHECK(client.Write(request, &status)) << status.error_message(); if (!client.Write(request, &status)) {
CHECK_EQ(local_to_cloud_trajectory_id_map_.count(local_trajectory_id), 0); LOG(ERROR) << status.error_message();
local_to_cloud_trajectory_id_map_[local_trajectory_id] = return false;
client.response().trajectory_id(); }
LOG(INFO) << "Created trajectory for client_id: " << client_id
<< " local trajectory_id: " << local_trajectory_id
<< " uplink trajectory_id: " << client.response().trajectory_id();
CHECK_EQ(local_trajectory_id_to_trajectory_info_.count(local_trajectory_id),
0);
local_trajectory_id_to_trajectory_info_.emplace(
std::piecewise_construct, std::forward_as_tuple(local_trajectory_id),
std::forward_as_tuple(client.response().trajectory_id(),
expected_sensor_ids, trajectory_options,
client_id));
return true;
} }
void LocalTrajectoryUploader::FinishTrajectory(const std::string& client_id, void LocalTrajectoryUploader::FinishTrajectory(const std::string& client_id,
int local_trajectory_id) { int local_trajectory_id) {
CHECK_EQ(local_to_cloud_trajectory_id_map_.count(local_trajectory_id), 1); CHECK_EQ(local_trajectory_id_to_trajectory_info_.count(local_trajectory_id),
1);
int cloud_trajectory_id = int cloud_trajectory_id =
local_to_cloud_trajectory_id_map_[local_trajectory_id]; local_trajectory_id_to_trajectory_info_.at(local_trajectory_id)
.uplink_trajectory_id;
proto::FinishTrajectoryRequest request; proto::FinishTrajectoryRequest request;
request.set_client_id(client_id); request.set_client_id(client_id);
request.set_trajectory_id(cloud_trajectory_id); request.set_trajectory_id(cloud_trajectory_id);

View File

@ -44,13 +44,16 @@ class LocalTrajectoryUploaderInterface {
// Enqueue an Add*DataRequest message to be uploaded. // Enqueue an Add*DataRequest message to be uploaded.
virtual void EnqueueSensorData( virtual void EnqueueSensorData(
std::unique_ptr<proto::SensorData> sensor_data) = 0; std::unique_ptr<proto::SensorData> sensor_data) = 0;
virtual void AddTrajectory(
// Creates a new trajectory with the specified settings in the uplink. A
// return value of 'false' indicates that the creation failed.
virtual bool AddTrajectory(
const std::string& client_id, int local_trajectory_id, const std::string& client_id, int local_trajectory_id,
const std::set<SensorId>& expected_sensor_ids, const std::set<SensorId>& expected_sensor_ids,
const mapping::proto::TrajectoryBuilderOptions& trajectory_options) = 0; const mapping::proto::TrajectoryBuilderOptions& trajectory_options) = 0;
virtual void FinishTrajectory(const std::string& client_id, virtual void FinishTrajectory(const std::string& client_id,
int local_trajectory_id) = 0; int local_trajectory_id) = 0;
virtual SensorId GetLocalSlamResultSensorId( virtual SensorId GetLocalSlamResultSensorId(
int local_trajectory_id) const = 0; int local_trajectory_id) const = 0;
}; };

View File

@ -36,7 +36,7 @@ class MockLocalTrajectoryUploader : public LocalTrajectoryUploaderInterface {
MOCK_METHOD0(Start, void()); MOCK_METHOD0(Start, void());
MOCK_METHOD0(Shutdown, void()); MOCK_METHOD0(Shutdown, void());
MOCK_METHOD4(AddTrajectory, MOCK_METHOD4(AddTrajectory,
void(const std::string &, int, const std::set<SensorId> &, bool(const std::string &, int, const std::set<SensorId> &,
const mapping::proto::TrajectoryBuilderOptions &)); const mapping::proto::TrajectoryBuilderOptions &));
MOCK_METHOD2(FinishTrajectory, void(const std::string &, int)); MOCK_METHOD2(FinishTrajectory, void(const std::string &, int));
MOCK_CONST_METHOD1(GetLocalSlamResultSensorId, SensorId(int)); MOCK_CONST_METHOD1(GetLocalSlamResultSensorId, SensorId(int));

View File

@ -87,6 +87,18 @@ class BlockingQueue {
return t; return t;
} }
// Like Peek, but can timeout. Returns nullptr in this case.
template <typename R>
R* PeekWithTimeout(const common::Duration timeout) {
MutexLocker lock(&mutex_);
if (!lock.AwaitWithTimeout(
[this]() REQUIRES(mutex_) { return !QueueEmptyCondition(); },
timeout)) {
return nullptr;
}
return deque_.front().get();
}
// Returns the next value in the queue or nullptr if the queue is empty. // Returns the next value in the queue or nullptr if the queue is empty.
// Maintains ownership. This assumes a member function get() that returns // Maintains ownership. This assumes a member function get() that returns
// a pointer to the given type R. // a pointer to the given type R.

View File

@ -33,7 +33,16 @@ void LocalSlamResult2D::AddToPoseGraph(int trajectory_id,
CHECK(local_slam_result_data_.submaps(0).has_submap_2d()); CHECK(local_slam_result_data_.submaps(0).has_submap_2d());
std::vector<std::shared_ptr<const mapping::Submap2D>> submaps; std::vector<std::shared_ptr<const mapping::Submap2D>> submaps;
for (const auto& submap_proto : local_slam_result_data_.submaps()) { for (const auto& submap_proto : local_slam_result_data_.submaps()) {
submaps.push_back(submap_controller_->UpdateSubmap(submap_proto)); auto submap_ptr = submap_controller_->UpdateSubmap(submap_proto);
if (submap_ptr) {
submaps.push_back(submap_ptr);
} else {
LOG(INFO) << "Ignoring submap";
}
}
if (submaps.size() == 0) {
LOG(INFO) << "Ignoring node";
return;
} }
static_cast<PoseGraph2D*>(pose_graph) static_cast<PoseGraph2D*>(pose_graph)
->AddNode(std::make_shared<const mapping::TrajectoryNode::Data>( ->AddNode(std::make_shared<const mapping::TrajectoryNode::Data>(

View File

@ -23,6 +23,11 @@ template <>
std::shared_ptr<mapping::Submap2D> std::shared_ptr<mapping::Submap2D>
SubmapController<mapping::Submap2D>::CreateSubmap( SubmapController<mapping::Submap2D>::CreateSubmap(
const mapping::proto::Submap& proto) { const mapping::proto::Submap& proto) {
if (proto.submap_2d().num_range_data() != 1) {
LOG(WARNING) << "Refusing to create partially filled submap: "
<< proto.submap_2d().num_range_data();
return nullptr;
}
return std::make_shared<mapping::Submap2D>(proto.submap_2d(), return std::make_shared<mapping::Submap2D>(proto.submap_2d(),
&conversion_tables_); &conversion_tables_);
} }
@ -31,6 +36,11 @@ template <>
std::shared_ptr<mapping::Submap3D> std::shared_ptr<mapping::Submap3D>
SubmapController<mapping::Submap3D>::CreateSubmap( SubmapController<mapping::Submap3D>::CreateSubmap(
const mapping::proto::Submap& proto) { const mapping::proto::Submap& proto) {
if (proto.submap_3d().num_range_data() != 1) {
LOG(INFO) << "Refusing to create partially filled submap: "
<< proto.submap_3d().num_range_data();
return nullptr;
}
return std::make_shared<mapping::Submap3D>(proto.submap_3d()); return std::make_shared<mapping::Submap3D>(proto.submap_3d());
} }

View File

@ -36,7 +36,9 @@ class SubmapController {
auto submap_it = unfinished_submaps_.find(submap_id); auto submap_it = unfinished_submaps_.find(submap_id);
if (submap_it == unfinished_submaps_.end()) { if (submap_it == unfinished_submaps_.end()) {
submap_ptr = CreateSubmap(proto); submap_ptr = CreateSubmap(proto);
unfinished_submaps_.Insert(submap_id, submap_ptr); if (submap_ptr) {
unfinished_submaps_.Insert(submap_id, submap_ptr);
}
return submap_ptr; return submap_ptr;
} }
submap_ptr = submap_it->data; submap_ptr = submap_it->data;