Use the timestamp delivery time to reconstruct time Don't waste the data! This exposes an issue in sim (and real life). It takes time between when a message was sent and when it was forwarded. Timestamp messages don't have this delay, so they end up dominating the estimate. This causes time estimation to not be absolutely perfect when replaying simulation, but it is close enough nobody would notice. Steady state, I think this is actually benificial behavior. Change-Id: I85a1e69d47ea09cc61abf82789bcc3b55981f1db

commit: 816e5d627b88be5abf23e65770086e68e047fb2d [log] [tgz]
author: Austin Schuh <austin.linux@gmail.com> Tue Jan 05 23:42:20 2021 -0800
committer: Austin Schuh <austin.linux@gmail.com> Wed Jan 20 10:45:19 2021 -0800
tree: ae1288841ddb86439bec7c47fbcf2bebac51acef
parent: 4d92f4a6e476436cd32a07b139385691c9b6a035 [diff]
diff --git a/aos/events/logging/logger.cc b/aos/events/logging/logger.cc
index 2dd4701..7e822de 100644
--- a/aos/events/logging/logger.cc
+++ b/aos/events/logging/logger.cc

@@ -333,8 +333,7 @@
   for (const Node *node : log_namer_->nodes()) {
     const int node_index = configuration::GetNodeIndex(configuration_, node);
 
-    node_state_[node_index].log_file_header =
-        MakeHeader(node, config_sha256);
+    node_state_[node_index].log_file_header = MakeHeader(node, config_sha256);
   }
 
   // Grab data from each channel right before we declare the log file started
@@ -2027,8 +2026,7 @@
 
   if (scheduled_time_ != remote_timestamps_.front().monotonic_timestamp_time) {
     CHECK_NOTNULL(timer_);
-    timer_->Setup(
-        remote_timestamps_.front().monotonic_timestamp_time);
+    timer_->Setup(remote_timestamps_.front().monotonic_timestamp_time);
     scheduled_time_ = remote_timestamps_.front().monotonic_timestamp_time;
   }
 }
@@ -2098,8 +2096,8 @@
   SeedSortedMessages();
 
   if (std::get<1>(result) != nullptr) {
-    std::get<1>(result)->Pop(
-        event_loop_->node(), std::get<0>(result).monotonic_event_time);
+    std::get<1>(result)->Pop(event_loop_->node(),
+                             std::get<0>(result).monotonic_event_time);
   }
   return std::move(std::get<0>(result));
 }
@@ -2161,6 +2159,16 @@
       filter->Sample(event_loop_->node(),
                      timestamped_message.monotonic_event_time,
                      timestamped_message.monotonic_remote_time);
+
+      if (timestamped_message.monotonic_timestamp_time !=
+          monotonic_clock::min_time) {
+        // TODO(austin): This assumes that this timestamp is only logged on the
+        // node which sent the data.  That is correct for now, but should be
+        // explicitly checked somewhere.
+        filter->ReverseSample(event_loop_->node(),
+                              timestamped_message.monotonic_event_time,
+                              timestamped_message.monotonic_timestamp_time);
+      }
     }
     sorted_messages_.emplace_back(std::move(timestamped_message), filter);
   }

diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index f57f129..a28891e 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc

@@ -1634,13 +1634,15 @@
       pi1_event_loop->configuration(), pong_on_pi1_fetcher.channel());
 
   const chrono::nanoseconds network_delay = event_loop_factory_.network_delay();
+  const chrono::nanoseconds send_delay = event_loop_factory_.send_delay();
 
   pi1_event_loop->MakeWatcher(
       "/aos/remote_timestamps/pi2",
       [&pi1_event_loop, &pi2_event_loop, pi1_timestamp_channel,
        ping_timestamp_channel, &pi1_timestamp_on_pi1_fetcher,
        &pi1_timestamp_on_pi2_fetcher, &ping_on_pi1_fetcher,
-       &ping_on_pi2_fetcher, network_delay](const RemoteMessage &header) {
+       &ping_on_pi2_fetcher, network_delay,
+       send_delay](const RemoteMessage &header) {
         const aos::monotonic_clock::time_point header_monotonic_sent_time(
             chrono::nanoseconds(header.monotonic_sent_time()));
         const aos::realtime_clock::time_point header_realtime_sent_time(
@@ -1691,18 +1693,20 @@
         EXPECT_EQ(pi1_context->monotonic_event_time,
                   header_monotonic_remote_time);
 
-        EXPECT_EQ(pi1_event_loop->context().monotonic_event_time,
-                  pi2_context->monotonic_event_time +
-                      (pi1_event_loop->monotonic_now() -
-                       pi2_event_loop->monotonic_now()) +
-                      network_delay);
+        // Time estimation isn't perfect, but we know the clocks were identical
+        // when logged, so we know when this should have come back.  Confirm we
+        // got it when we expected.
+        EXPECT_EQ(
+            pi1_event_loop->context().monotonic_event_time,
+            pi1_context->monotonic_event_time + 2 * network_delay + send_delay);
       });
   pi2_event_loop->MakeWatcher(
       "/aos/remote_timestamps/pi1",
       [&pi2_event_loop, &pi1_event_loop, pi2_timestamp_channel,
        pong_timestamp_channel, &pi2_timestamp_on_pi2_fetcher,
        &pi2_timestamp_on_pi1_fetcher, &pong_on_pi2_fetcher,
-       &pong_on_pi1_fetcher, network_delay](const RemoteMessage &header) {
+       &pong_on_pi1_fetcher, network_delay,
+       send_delay](const RemoteMessage &header) {
         const aos::monotonic_clock::time_point header_monotonic_sent_time(
             chrono::nanoseconds(header.monotonic_sent_time()));
         const aos::realtime_clock::time_point header_realtime_sent_time(
@@ -1753,11 +1757,12 @@
         EXPECT_EQ(pi2_context->monotonic_event_time,
                   header_monotonic_remote_time);
 
-        EXPECT_EQ(pi2_event_loop->context().monotonic_event_time,
-                  pi1_context->monotonic_event_time +
-                      (pi2_event_loop->monotonic_now() -
-                       pi1_event_loop->monotonic_now()) +
-                      network_delay);
+        // Time estimation isn't perfect, but we know the clocks were identical
+        // when logged, so we know when this should have come back.  Confirm we
+        // got it when we expected.
+        EXPECT_EQ(
+            pi2_event_loop->context().monotonic_event_time,
+            pi2_context->monotonic_event_time + 2 * network_delay + send_delay);
       });
 
   // And confirm we can re-create a log again, while checking the contents.

diff --git a/aos/network/multinode_timestamp_filter.cc b/aos/network/multinode_timestamp_filter.cc
index 8d39b9d..e9834c1 100644
--- a/aos/network/multinode_timestamp_filter.cc
+++ b/aos/network/multinode_timestamp_filter.cc

@@ -50,7 +50,7 @@
 
   // Ask for really good.  This is very quadratic, so it should be pretty
   // precise.
-  nlopt_set_xtol_rel(opt, 1e-5);
+  nlopt_set_xtol_rel(opt, 1e-9);
 
   cost_call_count_ = 0;
 
@@ -112,7 +112,7 @@
   for (size_t i = 0; i < result.size(); ++i) {
     if (live(i)) {
       result[i] = base_clock(i) + std::chrono::nanoseconds(static_cast<int64_t>(
-                                      std::floor(get_t(r, i))));
+                                      std::round(get_t(r, i))));
     } else {
       result[i] = monotonic_clock::min_time;
     }
@@ -805,9 +805,11 @@
   // Ok, now solve for the minimum time on each channel.
   std::vector<aos::monotonic_clock::time_point> times;
   NoncausalTimestampFilter *next_filter = nullptr;
+  size_t solution_index = 0;
   {
     size_t node_a_index = 0;
     for (const auto &filters : filters_per_node_) {
+      VLOG(1) << "Investigating filter for node " << node_a_index;
       monotonic_clock::time_point next_node_time = monotonic_clock::max_time;
       NoncausalTimestampFilter *next_node_filter = nullptr;
       // Find the oldest time for each node in each filter, and solve for that
@@ -839,9 +841,8 @@
         // on the node we are solving for.  The rate that time elapses should be
         // ~1.
         problem->set_base_clock(
-            node_index,
-            base_times[node_a_index] +
-                (next_node_time - base_times[problem->solution_node()]));
+            node_index, base_times[node_index] +
+                            (next_node_time - base_times[node_a_index]));
       }
 
       problem->set_solution_node(node_a_index);
@@ -852,9 +853,16 @@
       // TODO(austin): Can we cache?  Solving is expensive.
       std::vector<monotonic_clock::time_point> solution = problem->Solve();
 
+      if (VLOG_IS_ON(1)) {
+        VLOG(1) << "Candidate solution for node " << node_a_index << " is";
+        for (size_t i = 0; i < solution.size(); ++i) {
+          VLOG(1) << "  " << solution[i];
+        }
+      }
       if (times.empty()) {
         times = std::move(solution);
         next_filter = next_node_filter;
+        solution_index = node_a_index;
         ++node_a_index;
         continue;
       }
@@ -869,6 +877,7 @@
           // The new solution is better!  Save it.
           times = std::move(solution);
           next_filter = next_node_filter;
+          solution_index = node_a_index;
           break;
         case TimeComparison::kInvalid:
           // Somehow the new solution is better *and* worse than the old
@@ -886,6 +895,12 @@
       ++node_a_index;
     }
   }
+  if (VLOG_IS_ON(1)) {
+    VLOG(1) << "Best solution is for node " << solution_index;
+    for (size_t i = 0; i < times.size(); ++i) {
+      VLOG(1) << "  " << times[i];
+    }
+  }
   return std::make_tuple(next_filter, std::move(times));
 }
 
@@ -906,6 +921,7 @@
   // All done.
   if (next_filter == nullptr) {
     if (first_solution_) {
+      VLOG(1) << "No more timestamps and the first solution.";
       // If this is our first time, there is no solution.  Instead of giving up
       // completely, (and providing no estimate of time at all), just say that
       // everything is on the distributed clock.  This will then get used as a
@@ -945,6 +961,7 @@
     std::vector<aos::monotonic_clock::time_point> resolved_times;
     NoncausalTimestampFilter *resolved_next_filter = nullptr;
 
+    VLOG(1) << "Resolving with updated base times for accuracy.";
     std::tie(resolved_next_filter, resolved_times) =
         NextSolution(&problem, times);
 

diff --git a/aos/network/timestamp_filter.cc b/aos/network/timestamp_filter.cc
index 32f796c..da9e3fe 100644
--- a/aos/network/timestamp_filter.cc
+++ b/aos/network/timestamp_filter.cc

@@ -16,7 +16,7 @@
 namespace chrono = std::chrono;
 
 std::string TimeString(const aos::monotonic_clock::time_point t,
-                                        std::chrono::nanoseconds o) {
+                       std::chrono::nanoseconds o) {
   std::stringstream ss;
   ss << "O(" << t << ") = " << o.count() << ", remote " << t + o;
   return ss.str();
@@ -962,7 +962,7 @@
              x,
          monotonic_clock::time_point t) { return std::get<0>(x) < t; });
 
-  CHECK (it != timestamps_.end());
+  CHECK(it != timestamps_.end());
 
   CHECK(!std::get<2>(*(it)))
       << ": Tried to insert " << monotonic_now << " before " << std::get<0>(*it)
@@ -1249,6 +1249,23 @@
   }
 }
 
+void NoncausalOffsetEstimator::ReverseSample(
+    const Node *node, aos::monotonic_clock::time_point node_sent_time,
+    aos::monotonic_clock::time_point other_node_delivered_time) {
+  VLOG(1) << "Reverse sample delivered " << other_node_delivered_time
+          << " sent " << node_sent_time << " from "
+          << node->name()->string_view();
+  if (node == node_a_) {
+    b_.Sample(other_node_delivered_time,
+              node_sent_time - other_node_delivered_time);
+  } else if (node == node_b_) {
+    a_.Sample(other_node_delivered_time,
+              node_sent_time - other_node_delivered_time);
+  } else {
+    LOG(FATAL) << "Unknown node " << node->name()->string_view();
+  }
+}
+
 bool NoncausalOffsetEstimator::Pop(
     const Node *node, aos::monotonic_clock::time_point node_monotonic_now) {
   if (node == node_a_) {

diff --git a/aos/network/timestamp_filter.h b/aos/network/timestamp_filter.h
index 7227fc7..9c931a2 100644
--- a/aos/network/timestamp_filter.h
+++ b/aos/network/timestamp_filter.h

@@ -457,11 +457,16 @@
     return nullptr;
   }
 
-  // Updates the filter based on a sample from the provided node to the other
-  // node.
+  // Updates the filter for the provided node based on a sample from the
+  // provided node to the other node.
   void Sample(const Node *node,
               aos::monotonic_clock::time_point node_delivered_time,
               aos::monotonic_clock::time_point other_node_sent_time);
+  // Updates the filter for the provided node based on a sample going to the
+  // provided node from the other node.
+  void ReverseSample(
+      const Node *node, aos::monotonic_clock::time_point node_sent_time,
+      aos::monotonic_clock::time_point other_node_delivered_time);
 
   // Removes old data points from a node before the provided time.
   // Returns true if any points were popped.

diff --git a/aos/network/timestamp_filter_test.cc b/aos/network/timestamp_filter_test.cc
index 4379dcb..796965a 100644
--- a/aos/network/timestamp_filter_test.cc
+++ b/aos/network/timestamp_filter_test.cc

@@ -1073,8 +1073,8 @@
   EXPECT_EQ(estimator.a_timestamps_size(), 2u);
   EXPECT_EQ(estimator.b_timestamps_size(), 2u);
 
-  estimator.Sample(node_a, ta3, tb3);
-  estimator.Sample(node_b, tb3, ta3);
+  estimator.ReverseSample(node_b, tb3, ta3);
+  estimator.ReverseSample(node_a, ta3, tb3);
   EXPECT_EQ(estimator.a_timestamps_size(), 3u);
   EXPECT_EQ(estimator.b_timestamps_size(), 3u);
commit	816e5d627b88be5abf23e65770086e68e047fb2d	[log] [tgz]
author	Austin Schuh <austin.linux@gmail.com>	Tue Jan 05 23:42:20 2021 -0800
committer	Austin Schuh <austin.linux@gmail.com>	Wed Jan 20 10:45:19 2021 -0800
tree	ae1288841ddb86439bec7c47fbcf2bebac51acef
parent	4d92f4a6e476436cd32a07b139385691c9b6a035 [diff]