Handle local messages sent before remote messages after reboot
We only generate points in the time interpolation function (and
therefore only discover reboots, since a reboot can only happen at a
point). When there is data in a log before remote data, we don't
generate these points, and therefore can't replay the log.
F1118 00:05:39.808653 14 log_reader.cc:690] Check failed: monotonic_now == timestamped_message.monotonic_event_time.time : { "name": "pi2", "hostname": "raspberrypi2", "port": 9971 } Now 107.005000000sec trying to send {.boot=1, .time=107.000000000sec} failure node 1 (pi2) [
] queued_until {.boot=1, .time=109.210150000sec}
node 0 remote_data [
channel 0 [
{.channel_index=3, .queue_index={.boot=0, .index=91}, .timestamp={.boot=0, .time=9.200000000sec}, .data=0x555c65f23f80}
]
channel 1 [
{.channel_index=20, .queue_index={.boot=0, .index=622}, .timestamp={.boot=0, .time=9.210000000sec}, .data=0x555c66126a90}
]
] queued_until {.boot=0, .time=9.210350000sec}
*** Check failure stack trace: ***
@ 0x7fd804015aef google::LogMessageFatal::~LogMessageFatal()
@ 0x7fd8045a7069 aos::logger::LogReader::RegisterDuringStartup()::$_3::operator()()
@ 0x7fd804258119 aos::TimerHandler::Call()
@ 0x7fd804257ead aos::SimulatedTimerHandler::HandleEvent()
@ 0x7fd804263fb1 std::_Function_handler<>::_M_invoke()
@ 0x7fd804252c18 aos::EventScheduler::CallOldestEvent()
@ 0x7fd804253f54 aos::EventSchedulerScheduler::Run()
@ 0x7fd80425b472 aos::SimulatedEventLoopFactory::Run()
@ 0x555c641e780f aos::logger::testing::ConfirmReadable()
@ 0x555c6421507f aos::logger::testing::MultinodeRebootLoggerTest_LocalMessageBeforeRemoteBeforeStartAfterReboot_Test::TestBody()
@ 0x7fd803fa0ce4 testing::internal::HandleExceptionsInMethodIfSupported<>()
@ 0x7fd803fa0c21 testing::Test::Run()
@ 0x7fd803fa1fdf testing::TestInfo::Run()
@ 0x7fd803fa2b07 testing::TestSuite::Run()
@ 0x7fd803fb4d17 testing::internal::UnitTestImpl::RunAllTests()
@ 0x7fd803fb4634 testing::internal::HandleExceptionsInMethodIfSupported<>()
@ 0x7fd803fb44bb testing::UnitTest::Run()
@ 0x7fd804065010 main
@ 0x7fd803aa4d0a __libc_start_main
@ 0x555c641e132a _start
The fix is to generate a point for the first local timestamp too.
Change-Id: Ic1090d35375fce295d91bccc53f0d3ca8e472770
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index b58933e..7bf9478 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc
@@ -2860,6 +2860,147 @@
ConfirmReadable(filenames);
}
+// Tests that local data before remote data after reboot is properly replayed.
+// We only trigger a reboot in the timestamp interpolation function when solving
+// the timestamp problem when we actually have a point in the function. This
+// originally only happened when a point passes the noncausal filter. At the
+// start of time for the second boot, if we aren't careful, we will have
+// messages which need to be published at times before the boot. This happens
+// when a local message is in the log before a forwarded message, so there is no
+// point in the interpolation function. This delays the reboot. So, we need to
+// recreate that situation and make sure it doesn't come back.
+TEST(MultinodeRebootLoggerTest, LocalMessageBeforeRemoteBeforeStartAfterReboot) {
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(ArtifactPath(
+ "aos/events/logging/multinode_pingpong_split3_config.json"));
+ message_bridge::TestingTimeConverter time_converter(
+ configuration::NodesCount(&config.message()));
+ SimulatedEventLoopFactory event_loop_factory(&config.message());
+ event_loop_factory.SetTimeConverter(&time_converter);
+ NodeEventLoopFactory *const pi1 =
+ event_loop_factory.GetNodeEventLoopFactory("pi1");
+ const size_t pi1_index = configuration::GetNodeIndex(
+ event_loop_factory.configuration(), pi1->node());
+ NodeEventLoopFactory *const pi2 =
+ event_loop_factory.GetNodeEventLoopFactory("pi2");
+ const size_t pi2_index = configuration::GetNodeIndex(
+ event_loop_factory.configuration(), pi2->node());
+ NodeEventLoopFactory *const pi3 =
+ event_loop_factory.GetNodeEventLoopFactory("pi3");
+ const size_t pi3_index = configuration::GetNodeIndex(
+ event_loop_factory.configuration(), pi3->node());
+
+ const std::string kLogfile1_1 =
+ aos::testing::TestTmpDir() + "/multi_logfile1/";
+ const std::string kLogfile2_1 =
+ aos::testing::TestTmpDir() + "/multi_logfile2.1/";
+ const std::string kLogfile2_2 =
+ aos::testing::TestTmpDir() + "/multi_logfile2.2/";
+ const std::string kLogfile3_1 =
+ aos::testing::TestTmpDir() + "/multi_logfile3/";
+ util::UnlinkRecursive(kLogfile1_1);
+ util::UnlinkRecursive(kLogfile2_1);
+ util::UnlinkRecursive(kLogfile2_2);
+ util::UnlinkRecursive(kLogfile3_1);
+ const UUID pi1_boot0 = UUID::Random();
+ const UUID pi2_boot0 = UUID::Random();
+ const UUID pi2_boot1 = UUID::Random();
+ const UUID pi3_boot0 = UUID::Random();
+ {
+ CHECK_EQ(pi1_index, 0u);
+ CHECK_EQ(pi2_index, 1u);
+ CHECK_EQ(pi3_index, 2u);
+
+ time_converter.set_boot_uuid(pi1_index, 0, pi1_boot0);
+ time_converter.set_boot_uuid(pi2_index, 0, pi2_boot0);
+ time_converter.set_boot_uuid(pi2_index, 1, pi2_boot1);
+ time_converter.set_boot_uuid(pi3_index, 0, pi3_boot0);
+
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch(),
+ {BootTimestamp::epoch(), BootTimestamp::epoch(),
+ BootTimestamp::epoch()});
+ const chrono::nanoseconds reboot_time = chrono::milliseconds(5000);
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch() + reboot_time,
+ {BootTimestamp::epoch() + reboot_time,
+ BootTimestamp{
+ .boot = 1,
+ .time = monotonic_clock::epoch() + reboot_time + chrono::seconds(100)},
+ BootTimestamp::epoch() + reboot_time});
+ }
+
+ std::vector<std::string> filenames;
+ {
+ LoggerState pi1_logger = LoggerState::MakeLogger(
+ pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+ LoggerState pi3_logger = LoggerState::MakeLogger(
+ pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+ {
+ // And now start the logger.
+ LoggerState pi2_logger = LoggerState::MakeLogger(
+ pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+
+ pi1_logger.StartLogger(kLogfile1_1);
+ pi3_logger.StartLogger(kLogfile3_1);
+ pi2_logger.StartLogger(kLogfile2_1);
+
+ event_loop_factory.RunFor(chrono::milliseconds(1005));
+
+ // Now that we've got a start time in the past, turn on data.
+ std::unique_ptr<aos::EventLoop> ping_event_loop =
+ pi1->MakeEventLoop("ping");
+ Ping ping(ping_event_loop.get());
+
+ pi2->AlwaysStart<Pong>("pong");
+
+ event_loop_factory.RunFor(chrono::milliseconds(3000));
+
+ pi2_logger.AppendAllFilenames(&filenames);
+
+ // Disable any remote messages on pi2.
+ pi1->Disconnect(pi2->node());
+ pi2->Disconnect(pi1->node());
+ }
+ event_loop_factory.RunFor(chrono::milliseconds(995));
+ // pi2 now reboots at 5 seconds.
+ {
+ event_loop_factory.RunFor(chrono::milliseconds(1000));
+
+ // Make local stuff happen before we start logging and connect the remote.
+ pi2->AlwaysStart<Pong>("pong");
+ std::unique_ptr<aos::EventLoop> ping_event_loop =
+ pi1->MakeEventLoop("ping");
+ Ping ping(ping_event_loop.get());
+ event_loop_factory.RunFor(chrono::milliseconds(1005));
+
+ // Start logging again on pi2 after it is up.
+ LoggerState pi2_logger = LoggerState::MakeLogger(
+ pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+ pi2_logger.StartLogger(kLogfile2_2);
+
+ // And allow remote messages now that we have some local ones.
+ pi1->Connect(pi2->node());
+ pi2->Connect(pi1->node());
+
+ event_loop_factory.RunFor(chrono::milliseconds(1000));
+
+ event_loop_factory.RunFor(chrono::milliseconds(3000));
+
+ pi2_logger.AppendAllFilenames(&filenames);
+ }
+
+ pi1_logger.AppendAllFilenames(&filenames);
+ pi3_logger.AppendAllFilenames(&filenames);
+ }
+
+ // Confirm that we can parse the result. LogReader has enough internal CHECKs
+ // to confirm the right thing happened.
+ const std::vector<LogFile> sorted_parts = SortParts(filenames);
+ ConfirmReadable(filenames);
+}
+
+
} // namespace testing
} // namespace logger
} // namespace aos