Track matching forwarded times for log file sorting
Previously, the only thing we knew was a loose ordering based on part
index. That isn't very conclusive. This now provides us with
corresponding timestamps for when things cross the network boundary in a
way that we think should tell us definitively what came first.
There are 2 main problem cases. Let's say we have 2 channels. /a which
is reliable, and /b which isn't, both sent from the same remote node.
case 1: /a -> boot 0 received on boot 0.
/b -> boot 1 received on boot 0.
We start logging after both messages arrive.
case 2: /a -> boot 0 received on boot 0.
/b -> boot 0 received on boot 0.
We log for a bit, then reboot. More messages show up when we reconnect.
/a -> boot 0 received on boot 1.
/b -> boot 0 received on boot 1.
In case 1: we only have a reliable timestamp from boot 0, but that
reliable timestamp makes it clear that /a was before /b, so boot 0 was
before boot 1.
In case 2: we have the same reliable timestamp, so that tells us nothing.
The unreliable timestamps though tell a different story. /b will be after
/a, since any messages on /b generated before the reboot won't get
delivered. So, we get an ordering constraint saying that any sent /b's
on the second boot were after /b on the first boot.
We believe that any other cases are covered by the same mechanism.
Without fully implementing and debugging the sorting code, we won't know
for certain. Sanjay and I have been unable to break the logic so far.
Change-Id: I990bf249b18bf43072997cdb099ac66c2fa8fc57
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index a439f84..8cad486 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc
@@ -558,6 +558,7 @@
result.emplace_back(logfile_base1_ + "_pi1_data.part0.bfbs");
result.emplace_back(logfile_base1_ + "_pi1_data.part1.bfbs");
result.emplace_back(logfile_base1_ + "_pi1_data.part2.bfbs");
+ result.emplace_back(logfile_base1_ + "_pi1_data.part3.bfbs");
result.emplace_back(logfile_base1_ +
"_pi2_data/test/aos.examples.Pong.part0.bfbs");
result.emplace_back(logfile_base1_ +
@@ -1527,7 +1528,8 @@
event_loop_factory_.RunFor(logger_run3);
}
- LogReader reader(SortParts(logfiles_));
+ LogReader reader(
+ SortParts(MakeLogFiles(logfile_base1_, logfile_base2_, 3, 2)));
SimulatedEventLoopFactory log_reader_factory(reader.configuration());
log_reader_factory.set_send_delay(chrono::microseconds(0));
@@ -2268,6 +2270,119 @@
event_loop_factory_.RunFor(chrono::milliseconds(20000));
}
+ // Confirm that our new oldest timestamps properly update as we reboot and
+ // rotate.
+ for (const std::string &file : pi1_reboot_logfiles_) {
+ std::optional<SizePrefixedFlatbufferVector<LogFileHeader>> log_header =
+ ReadHeader(file);
+ CHECK(log_header);
+ if (log_header->message().has_configuration()) {
+ continue;
+ }
+
+ if (log_header->message().node()->name()->string_view() != "pi1") {
+ continue;
+ }
+ SCOPED_TRACE(file);
+ SCOPED_TRACE(aos::FlatbufferToJson(
+ *log_header, {.multi_line = true, .max_vector_size = 100}));
+ ASSERT_TRUE(log_header->message().has_oldest_remote_monotonic_timestamps());
+ ASSERT_EQ(
+ log_header->message().oldest_remote_monotonic_timestamps()->size(), 2u);
+ EXPECT_EQ(
+ log_header->message().oldest_remote_monotonic_timestamps()->Get(0),
+ monotonic_clock::max_time.time_since_epoch().count());
+ ASSERT_TRUE(log_header->message().has_oldest_local_monotonic_timestamps());
+ ASSERT_EQ(log_header->message().oldest_local_monotonic_timestamps()->size(),
+ 2u);
+ EXPECT_EQ(log_header->message().oldest_local_monotonic_timestamps()->Get(0),
+ monotonic_clock::max_time.time_since_epoch().count());
+ ASSERT_TRUE(log_header->message()
+ .has_oldest_remote_unreliable_monotonic_timestamps());
+ ASSERT_EQ(log_header->message()
+ .oldest_remote_unreliable_monotonic_timestamps()
+ ->size(),
+ 2u);
+ EXPECT_EQ(log_header->message()
+ .oldest_remote_unreliable_monotonic_timestamps()
+ ->Get(0),
+ monotonic_clock::max_time.time_since_epoch().count());
+ ASSERT_TRUE(log_header->message()
+ .has_oldest_local_unreliable_monotonic_timestamps());
+ ASSERT_EQ(log_header->message()
+ .oldest_local_unreliable_monotonic_timestamps()
+ ->size(),
+ 2u);
+ EXPECT_EQ(log_header->message()
+ .oldest_local_unreliable_monotonic_timestamps()
+ ->Get(0),
+ monotonic_clock::max_time.time_since_epoch().count());
+
+ const monotonic_clock::time_point oldest_remote_monotonic_timestamps =
+ monotonic_clock::time_point(chrono::nanoseconds(
+ log_header->message().oldest_remote_monotonic_timestamps()->Get(
+ 1)));
+ const monotonic_clock::time_point oldest_local_monotonic_timestamps =
+ monotonic_clock::time_point(chrono::nanoseconds(
+ log_header->message().oldest_local_monotonic_timestamps()->Get(1)));
+ const monotonic_clock::time_point
+ oldest_remote_unreliable_monotonic_timestamps =
+ monotonic_clock::time_point(chrono::nanoseconds(
+ log_header->message()
+ .oldest_remote_unreliable_monotonic_timestamps()
+ ->Get(1)));
+ const monotonic_clock::time_point
+ oldest_local_unreliable_monotonic_timestamps =
+ monotonic_clock::time_point(chrono::nanoseconds(
+ log_header->message()
+ .oldest_local_unreliable_monotonic_timestamps()
+ ->Get(1)));
+ switch (log_header->message().parts_index()) {
+ case 0:
+ EXPECT_EQ(oldest_remote_monotonic_timestamps,
+ monotonic_clock::max_time);
+ EXPECT_EQ(oldest_local_monotonic_timestamps, monotonic_clock::max_time);
+ EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+ monotonic_clock::max_time);
+ EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+ monotonic_clock::max_time);
+ break;
+ case 1:
+ EXPECT_EQ(oldest_remote_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(90200)));
+ EXPECT_EQ(oldest_local_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(90350)));
+ EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(90200)));
+ EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(90350)));
+ break;
+ case 2:
+ EXPECT_EQ(oldest_remote_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(10100000)));
+ EXPECT_EQ(oldest_local_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(10100150)));
+ EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+ monotonic_clock::max_time);
+ EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+ monotonic_clock::max_time);
+ break;
+ case 3:
+ EXPECT_EQ(oldest_remote_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(10100000)));
+ EXPECT_EQ(oldest_local_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(10100150)));
+ EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(10100200)));
+ EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+ monotonic_clock::time_point(chrono::microseconds(10100350)));
+ break;
+ default:
+ FAIL();
+ break;
+ }
+ }
+
// Confirm that we refuse to replay logs with missing boot uuids.
EXPECT_DEATH(
{