Add tests for max out of order duration.
Add test cases to check max out of order duration doesn't explode when
we're logging messages from a previous boot and when we don't have a
start time because the remote node is disconnected when we start
logging.
Change-Id: Ie0a3e1758503d128d0ef98d85e5b6313fabc2a9e
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/events/logging/BUILD b/aos/events/logging/BUILD
index 1125a0e..3bed5c4 100644
--- a/aos/events/logging/BUILD
+++ b/aos/events/logging/BUILD
@@ -710,6 +710,21 @@
deps = ["//aos/events:aos_config"],
)
+aos_config(
+ name = "multinode_pingpong_reboot_ooo_config",
+ src = "multinode_pingpong_reboot_ooo.json",
+ flatbuffers = [
+ "//aos/events:ping_fbs",
+ "//aos/events:pong_fbs",
+ "//aos/network:message_bridge_client_fbs",
+ "//aos/network:message_bridge_server_fbs",
+ "//aos/network:remote_message_fbs",
+ "//aos/network:timestamp_fbs",
+ ],
+ target_compatible_with = ["@platforms//os:linux"],
+ deps = ["//aos/events:aos_config"],
+)
+
cc_test(
name = "realtime_replay_test",
srcs = ["realtime_replay_test.cc"],
@@ -754,6 +769,7 @@
}),
data = [
":multinode_pingpong_combined_config",
+ ":multinode_pingpong_reboot_ooo_config",
":multinode_pingpong_split3_config",
":multinode_pingpong_split4_config",
":multinode_pingpong_split4_mixed1_config",
diff --git a/aos/events/logging/multinode_logger_test.cc b/aos/events/logging/multinode_logger_test.cc
index 648f430..48f1b8b 100644
--- a/aos/events/logging/multinode_logger_test.cc
+++ b/aos/events/logging/multinode_logger_test.cc
@@ -4156,6 +4156,224 @@
// up in both logs correctly.
}
+// Tests that when we have evidence of 2 boots, and then start logging, the
+// max_out_of_order_duration ends up reasonable on the boot with the start time.
+TEST(MultinodeLoggerLoopTest, PreviousBootData) {
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(ArtifactPath(
+ "aos/events/logging/multinode_pingpong_reboot_ooo_config.json"));
+ message_bridge::TestingTimeConverter time_converter(
+ configuration::NodesCount(&config.message()));
+ SimulatedEventLoopFactory event_loop_factory(&config.message());
+ event_loop_factory.SetTimeConverter(&time_converter);
+
+ const UUID pi1_boot0 = UUID::Random();
+ const UUID pi2_boot0 = UUID::Random();
+ const UUID pi2_boot1 = UUID::Random();
+
+ const std::string kLogfile1_1 =
+ aos::testing::TestTmpDir() + "/multi_logfile1/";
+ util::UnlinkRecursive(kLogfile1_1);
+
+ {
+ constexpr size_t kPi1Index = 0;
+ constexpr size_t kPi2Index = 1;
+ time_converter.set_boot_uuid(kPi1Index, 0, pi1_boot0);
+ time_converter.set_boot_uuid(kPi2Index, 0, pi2_boot0);
+ time_converter.set_boot_uuid(kPi2Index, 1, pi2_boot1);
+
+ // Make pi1 boot before everything else.
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch(),
+ {BootTimestamp::epoch(),
+ BootTimestamp::epoch() - chrono::milliseconds(100)});
+
+ const chrono::nanoseconds reboot_time = chrono::seconds(1005);
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch() + reboot_time,
+ {BootTimestamp::epoch() + reboot_time,
+ BootTimestamp{.boot = 1, .time = monotonic_clock::epoch()}});
+ }
+
+ NodeEventLoopFactory *const pi1 =
+ event_loop_factory.GetNodeEventLoopFactory("pi1");
+ NodeEventLoopFactory *const pi2 =
+ event_loop_factory.GetNodeEventLoopFactory("pi2");
+
+ // What we want is for pi2 to send a message at t=1000 on the first channel
+ // (/atest1 pong), and t=1 on the second channel (/atest3 pong). That'll make
+ // the max out of order duration be large.
+ //
+ // Then, we reboot, and only send messages on a third channel (/atest2 pong).
+ // The order is key, they need to sort in this order in the config.
+
+ std::vector<std::string> filenames;
+ {
+ {
+ std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+ aos::Sender<examples::Pong> pong_sender =
+ pi2_event_loop->MakeSender<examples::Pong>("/atest3");
+
+ pi2_event_loop->OnRun([&]() {
+ aos::Sender<examples::Pong>::Builder builder =
+ pong_sender.MakeBuilder();
+ examples::Pong::Builder pong_builder =
+ builder.MakeBuilder<examples::Pong>();
+ CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+ });
+
+ event_loop_factory.RunFor(chrono::seconds(1000));
+ }
+
+ {
+ std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+ aos::Sender<examples::Pong> pong_sender =
+ pi2_event_loop->MakeSender<examples::Pong>("/atest1");
+
+ aos::Sender<examples::Pong>::Builder builder = pong_sender.MakeBuilder();
+ examples::Pong::Builder pong_builder =
+ builder.MakeBuilder<examples::Pong>();
+ CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+ }
+
+ event_loop_factory.RunFor(chrono::seconds(10));
+
+ // Now start a receiving node first. This sets up 2 tight bounds between
+ // 2 of the nodes.
+ LoggerState pi1_logger = MakeLoggerState(
+ pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+ pi1_logger.StartLogger(kLogfile1_1);
+
+ std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+ aos::Sender<examples::Pong> pong_sender =
+ pi2_event_loop->MakeSender<examples::Pong>("/atest2");
+
+ pi2_event_loop->AddPhasedLoop(
+ [&pong_sender](int) {
+ aos::Sender<examples::Pong>::Builder builder =
+ pong_sender.MakeBuilder();
+ examples::Pong::Builder pong_builder =
+ builder.MakeBuilder<examples::Pong>();
+ CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+ },
+ chrono::milliseconds(10));
+
+ event_loop_factory.RunFor(chrono::seconds(100));
+
+ pi1_logger.AppendAllFilenames(&filenames);
+ }
+
+ // Make sure we can read this.
+ const std::vector<LogFile> sorted_parts = SortParts(filenames);
+ EXPECT_TRUE(AllRebootPartsMatchOutOfOrderDuration(sorted_parts, "pi2"));
+ auto result = ConfirmReadable(filenames);
+}
+
+// Tests that when we start without a connection, and then start logging, the
+// max_out_of_order_duration ends up reasonable.
+TEST(MultinodeLoggerLoopTest, StartDisconnected) {
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(ArtifactPath(
+ "aos/events/logging/multinode_pingpong_reboot_ooo_config.json"));
+ message_bridge::TestingTimeConverter time_converter(
+ configuration::NodesCount(&config.message()));
+ SimulatedEventLoopFactory event_loop_factory(&config.message());
+ event_loop_factory.SetTimeConverter(&time_converter);
+
+ time_converter.StartEqual();
+
+ const std::string kLogfile1_1 =
+ aos::testing::TestTmpDir() + "/multi_logfile1/";
+ util::UnlinkRecursive(kLogfile1_1);
+
+ NodeEventLoopFactory *const pi1 =
+ event_loop_factory.GetNodeEventLoopFactory("pi1");
+ NodeEventLoopFactory *const pi2 =
+ event_loop_factory.GetNodeEventLoopFactory("pi2");
+
+ // What we want is for pi2 to send a message at t=1000 on the first channel
+ // (/atest1 pong), and t=1 on the second channel (/atest3 pong). That'll make
+ // the max out of order duration be large.
+ //
+ // Then, we disconnect, and only send messages on a third channel
+ // (/atest2 pong). The order is key, they need to sort in this order in the
+ // config so we observe them in the order which grows the
+ // max_out_of_order_duration.
+
+ std::vector<std::string> filenames;
+ {
+ {
+ std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+ aos::Sender<examples::Pong> pong_sender =
+ pi2_event_loop->MakeSender<examples::Pong>("/atest3");
+
+ pi2_event_loop->OnRun([&]() {
+ aos::Sender<examples::Pong>::Builder builder =
+ pong_sender.MakeBuilder();
+ examples::Pong::Builder pong_builder =
+ builder.MakeBuilder<examples::Pong>();
+ CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+ });
+
+ event_loop_factory.RunFor(chrono::seconds(1000));
+ }
+
+ {
+ std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+ aos::Sender<examples::Pong> pong_sender =
+ pi2_event_loop->MakeSender<examples::Pong>("/atest1");
+
+ aos::Sender<examples::Pong>::Builder builder = pong_sender.MakeBuilder();
+ examples::Pong::Builder pong_builder =
+ builder.MakeBuilder<examples::Pong>();
+ CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+ }
+
+ event_loop_factory.RunFor(chrono::seconds(10));
+
+ pi1->Disconnect(pi2->node());
+ pi2->Disconnect(pi1->node());
+
+ // Make data flow.
+ std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+ aos::Sender<examples::Pong> pong_sender =
+ pi2_event_loop->MakeSender<examples::Pong>("/atest2");
+
+ pi2_event_loop->AddPhasedLoop(
+ [&pong_sender](int) {
+ aos::Sender<examples::Pong>::Builder builder =
+ pong_sender.MakeBuilder();
+ examples::Pong::Builder pong_builder =
+ builder.MakeBuilder<examples::Pong>();
+ CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+ },
+ chrono::milliseconds(10));
+
+ event_loop_factory.RunFor(chrono::seconds(10));
+
+ // Now start a receiving node first. This sets up 2 tight bounds between
+ // 2 of the nodes.
+ LoggerState pi1_logger = MakeLoggerState(
+ pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+ pi1_logger.StartLogger(kLogfile1_1);
+
+ event_loop_factory.RunFor(chrono::seconds(10));
+
+ // Now, reconnect, and everything should recover.
+ pi1->Connect(pi2->node());
+ pi2->Connect(pi1->node());
+
+ event_loop_factory.RunFor(chrono::seconds(10));
+
+ pi1_logger.AppendAllFilenames(&filenames);
+ }
+
+ // Make sure we can read this.
+ const std::vector<LogFile> sorted_parts = SortParts(filenames);
+ EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+ auto result = ConfirmReadable(filenames);
+}
+
} // namespace testing
} // namespace logger
} // namespace aos
diff --git a/aos/events/logging/multinode_logger_test_lib.cc b/aos/events/logging/multinode_logger_test_lib.cc
index f383778..3569399 100644
--- a/aos/events/logging/multinode_logger_test_lib.cc
+++ b/aos/events/logging/multinode_logger_test_lib.cc
@@ -546,6 +546,7 @@
bool AllPartsMatchOutOfOrderDuration(
const std::vector<LogFile> &files,
std::chrono::nanoseconds max_out_of_order_duration) {
+ bool result = true;
for (const LogFile &file : files) {
for (const LogParts &parts : file.parts) {
if (parts.max_out_of_order_duration != max_out_of_order_duration) {
@@ -553,11 +554,32 @@
<< parts.max_out_of_order_duration.count()
<< "ns instead of " << max_out_of_order_duration.count()
<< "ns for " << parts;
- return false;
+ result = false;
}
}
}
- return true;
+ return result;
+}
+
+bool AllRebootPartsMatchOutOfOrderDuration(
+ const std::vector<LogFile> &files, const std::string node,
+ std::chrono::nanoseconds max_out_of_order_duration) {
+ bool result = true;
+ for (const LogFile &file : files) {
+ for (const LogParts &parts : file.parts) {
+ if (parts.node == node && parts.boot_count == 0) {
+ continue;
+ }
+ if (parts.max_out_of_order_duration != max_out_of_order_duration) {
+ LOG(ERROR) << "Found an out of order duration of "
+ << parts.max_out_of_order_duration.count()
+ << "ns instead of " << max_out_of_order_duration.count()
+ << "ns for " << parts;
+ result = false;
+ }
+ }
+ }
+ return result;
}
} // namespace testing
diff --git a/aos/events/logging/multinode_logger_test_lib.h b/aos/events/logging/multinode_logger_test_lib.h
index 1bea704..d51a17f 100644
--- a/aos/events/logging/multinode_logger_test_lib.h
+++ b/aos/events/logging/multinode_logger_test_lib.h
@@ -106,6 +106,12 @@
std::chrono::nanoseconds max_out_of_order_duration =
std::chrono::milliseconds(300));
+// Skips checking the part file with boot_count 0 for 'node'.
+bool AllRebootPartsMatchOutOfOrderDuration(
+ const std::vector<LogFile> &files, const std::string node,
+ std::chrono::nanoseconds max_out_of_order_duration =
+ std::chrono::milliseconds(300));
+
class MultinodeLoggerTest : public ::testing::TestWithParam<
std::tuple<ConfigParams, CompressionParams>> {
public:
diff --git a/aos/events/logging/multinode_pingpong_reboot_ooo.json b/aos/events/logging/multinode_pingpong_reboot_ooo.json
new file mode 100644
index 0000000..80d2972
--- /dev/null
+++ b/aos/events/logging/multinode_pingpong_reboot_ooo.json
@@ -0,0 +1,240 @@
+{
+ "channels": [
+ {
+ "name": "/pi1/aos",
+ "type": "aos.logging.LogMessageFbs",
+ "source_node": "pi1",
+ "frequency": 200,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.logging.LogMessageFbs",
+ "source_node": "pi2",
+ "frequency": 200,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ /* Logged on pi1 locally */
+ {
+ "name": "/pi1/aos",
+ "type": "aos.timing.Report",
+ "source_node": "pi1",
+ "frequency": 50,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.timing.Report",
+ "source_node": "pi2",
+ "frequency": 50,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.ServerStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.ServerStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi2"
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.logging.DynamicLogCommand",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.logging.DynamicLogCommand",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi2"
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.ClientStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.ClientStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi2"
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.Timestamp",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi2"],
+ "source_node": "pi1",
+ "destination_nodes": [
+ {
+ "name": "pi2",
+ "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+ "timestamp_logger_nodes": ["pi1"]
+ }
+ ]
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.Timestamp",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "source_node": "pi2",
+ "destination_nodes": [
+ {
+ "name": "pi1",
+ "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+ "timestamp_logger_nodes": ["pi2"]
+ }
+ ]
+ },
+ {
+ "name": "/pi1/aos/remote_timestamps/pi2/pi1/aos/aos-message_bridge-Timestamp",
+ "type": "aos.message_bridge.RemoteMessage",
+ /* Log one remote timestamp channel to ensure everythings still works. */
+ "logger": "LOCAL_LOGGER",
+ "num_senders": 2,
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos/remote_timestamps/pi1/pi2/aos/aos-message_bridge-Timestamp",
+ "type": "aos.message_bridge.RemoteMessage",
+ "logger": "NOT_LOGGED",
+ "num_senders": 2,
+ "source_node": "pi2"
+ },
+
+
+ {
+ "name": "/atest1",
+ "type": "aos.examples.Ping",
+ "source_node": "pi1",
+ "destination_nodes": [
+ {
+ "name": "pi2",
+ "priority": 1,
+ "timestamp_logger": "LOCAL_LOGGER",
+ "time_to_live": 5000000
+ }
+ ],
+ "frequency": 150
+ },
+ {
+ "name": "/atest1",
+ "type": "aos.examples.Pong",
+ "source_node": "pi2",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "destination_nodes": [
+ {
+ "name": "pi1",
+ "priority": 1,
+ "timestamp_logger": "LOCAL_LOGGER",
+ "time_to_live": 5000000
+ }
+ ],
+ "frequency": 150
+ },
+ {
+ "name": "/atest2",
+ "type": "aos.examples.Ping",
+ "source_node": "pi1",
+ "destination_nodes": [
+ {
+ "name": "pi2",
+ "priority": 1,
+ "timestamp_logger": "LOCAL_LOGGER",
+ "time_to_live": 5000000
+ }
+ ],
+ "frequency": 150
+ },
+ {
+ "name": "/atest2",
+ "type": "aos.examples.Pong",
+ "source_node": "pi2",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "destination_nodes": [
+ {
+ "name": "pi1",
+ "priority": 1,
+ "timestamp_logger": "LOCAL_LOGGER",
+ "time_to_live": 5000000
+ }
+ ],
+ "frequency": 150
+ },
+ {
+ "name": "/atest3",
+ "type": "aos.examples.Ping",
+ "source_node": "pi1",
+ "destination_nodes": [
+ {
+ "name": "pi2",
+ "priority": 1,
+ "timestamp_logger": "LOCAL_LOGGER",
+ "time_to_live": 5000000
+ }
+ ],
+ "frequency": 150
+ },
+ {
+ "name": "/atest3",
+ "type": "aos.examples.Pong",
+ "source_node": "pi2",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "destination_nodes": [
+ {
+ "name": "pi1",
+ "priority": 1,
+ "timestamp_logger": "LOCAL_LOGGER",
+ "time_to_live": 5000000
+ }
+ ],
+ "frequency": 150
+ }
+ ],
+ "maps": [
+ {
+ "match": {
+ "name": "/aos*",
+ "source_node": "pi1"
+ },
+ "rename": {
+ "name": "/pi1/aos"
+ }
+ },
+ {
+ "match": {
+ "name": "/aos*",
+ "source_node": "pi2"
+ },
+ "rename": {
+ "name": "/pi2/aos"
+ }
+ }
+ ],
+ "nodes": [
+ {
+ "name": "pi1",
+ "hostname": "raspberrypi",
+ "port": 9971
+ },
+ {
+ "name": "pi2",
+ "hostname": "raspberrypi2",
+ "port": 9971
+ }
+ ]
+}