Add a test reproducing a log reading failure

We saw a case where we only had a reliable message for the second boot.

on pi2 from pi1 { "name": "/atest1", "type": "aos.examples.Ping" }
    .monotonic_event_time={.boot=0, .time=99.000150000sec},
    .monotonic_remote_time={.boot=0, .time=99.000000000sec},
    .monotonic_timestamp_time={.boot=0, .time=99.000250000sec}

on pi2 from pi1 { "name": "/atest2", "type": "aos.examples.Ping" }
    .monotonic_event_time={.boot=1, .time=0.000100000sec},
    .monotonic_remote_time={.boot=0, .time=1.000000000sec},
    .monotonic_timestamp_time={.boot=0, .time=100.000200000sec},

This ends up resulting in it looking like the second boot happened 50
seconds before the end of the first boot, and time going backwards.

Add a test to recreate this situation so we can fix it.

Change-Id: I4615e9c85ba57dcd7e656af1f94d3ca90f2c2126
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/events/logging/multinode_logger_test.cc b/aos/events/logging/multinode_logger_test.cc
index 427347d..a3cda43 100644
--- a/aos/events/logging/multinode_logger_test.cc
+++ b/aos/events/logging/multinode_logger_test.cc
@@ -79,6 +79,44 @@
                          ForceTimestampBuffering::kAutoBuffer}),
         ::testing::ValuesIn(SupportedCompressionAlgorithms())));
 
+// Class to spam Pong messages blindly.
+class PongSender {
+ public:
+  PongSender(EventLoop *loop, std::string_view channel_name)
+      : sender_(loop->MakeSender<examples::Pong>(channel_name)) {
+    loop->AddPhasedLoop(
+        [this](int) {
+          aos::Sender<examples::Pong>::Builder builder = sender_.MakeBuilder();
+          examples::Pong::Builder pong_builder =
+              builder.MakeBuilder<examples::Pong>();
+          CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+        },
+        chrono::milliseconds(10));
+  }
+
+ private:
+  aos::Sender<examples::Pong> sender_;
+};
+
+// Class to spam Ping messages blindly.
+class PingSender {
+ public:
+  PingSender(EventLoop *loop, std::string_view channel_name)
+      : sender_(loop->MakeSender<examples::Ping>(channel_name)) {
+    loop->AddPhasedLoop(
+        [this](int) {
+          aos::Sender<examples::Ping>::Builder builder = sender_.MakeBuilder();
+          examples::Ping::Builder ping_builder =
+              builder.MakeBuilder<examples::Ping>();
+          CHECK_EQ(builder.Send(ping_builder.Finish()), RawSender::Error::kOk);
+        },
+        chrono::milliseconds(10));
+  }
+
+ private:
+  aos::Sender<examples::Ping> sender_;
+};
+
 // Tests that we can write and read simple multi-node log files.
 TEST_P(MultinodeLoggerTest, SimpleMultiNode) {
   if (file_strategy() == FileStrategy::kCombine) {
@@ -4686,24 +4724,71 @@
   auto result = ConfirmReadable(filenames);
 }
 
-// Class to spam Pong messages blindly.
-class PongSender {
- public:
-  PongSender(EventLoop *loop, std::string_view channel_name)
-      : sender_(loop->MakeSender<examples::Pong>(channel_name)) {
-    loop->AddPhasedLoop(
-        [this](int) {
-          aos::Sender<examples::Pong>::Builder builder = sender_.MakeBuilder();
-          examples::Pong::Builder pong_builder =
-              builder.MakeBuilder<examples::Pong>();
-          CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
-        },
-        chrono::milliseconds(10));
+// Tests that only having a delayed, reliable message from a boot results in a
+// readable log.
+//
+// Note: this is disabled since it doesn't work yet.  Un-disable this when the
+// code is fixed!
+TEST(MultinodeLoggerLoopTest, DISABLED_ReliableOnlyTimestamps) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(
+          ArtifactPath("aos/events/logging/"
+                       "multinode_pingpong_reboot_reliable_only_config.json"));
+  message_bridge::TestingTimeConverter time_converter(
+      configuration::NodesCount(&config.message()));
+  SimulatedEventLoopFactory event_loop_factory(&config.message());
+  event_loop_factory.SetTimeConverter(&time_converter);
+
+  constexpr chrono::nanoseconds kRebootTime = chrono::seconds(100);
+  {
+    time_converter.AddNextTimestamp(
+        distributed_clock::epoch(),
+        {BootTimestamp::epoch(), BootTimestamp::epoch()});
+    time_converter.AddNextTimestamp(
+        distributed_clock::epoch() + kRebootTime,
+        {BootTimestamp::epoch() + kRebootTime,
+         BootTimestamp{.boot = 1, .time = monotonic_clock::epoch()}});
   }
 
- private:
-  aos::Sender<examples::Pong> sender_;
-};
+  const std::string kLogfile1_1 =
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
+
+  NodeEventLoopFactory *const pi1 =
+      event_loop_factory.GetNodeEventLoopFactory("pi1");
+
+  // We want unreliable timestamps from one boot, a reliable timestamp from the
+  // same boot, and then a long delayed reliable timestamp from the second boot.
+  // This produces conflicting information about when the second boot happened.
+  std::vector<std::string> filenames;
+  PingSender *app1 = pi1->AlwaysStart<PingSender>("pingsender", "/atest1");
+  PingSender *app2 = pi1->AlwaysStart<PingSender>("pingsender", "/atest2");
+  event_loop_factory.RunFor(chrono::seconds(1));
+  pi1->Stop(app2);
+  event_loop_factory.RunFor(kRebootTime - chrono::seconds(2));
+  pi1->Stop(app1);
+
+  event_loop_factory.RunFor(chrono::seconds(1) + kRebootTime * 2);
+
+  {
+    // Collect a small log after reboot.
+    LoggerState pi1_logger = MakeLoggerState(
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
+    pi1_logger.StartLogger(kLogfile1_1);
+
+    event_loop_factory.RunFor(chrono::seconds(1));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+  }
+
+  // Make sure we can read this.
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  auto result = ConfirmReadable(filenames);
+}
 
 // Tests that we log correctly as nodes connect slowly.
 TEST(MultinodeLoggerLoopTest, StaggeredConnect) {