Add logic for mixed reliable/unreliable boot order
Tests are present that exercise the Unreliable vs Reliable added if
statement. Both possible time orders are tested
Unreliable < Reliable and Unreliable > Reliable.
I am unable to produce a test that exercises the
Reliable vs Unreliable added if without introducing artificial test
conditions into logfile_sorting.cc. Manual testing shows that it
works as expected, but there is no automated test present.
Change-Id: Ia0e6e3ccc1e42763465e8506bfeb7bec0838782b
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/events/logging/BUILD b/aos/events/logging/BUILD
index 6cb3740..d421fae 100644
--- a/aos/events/logging/BUILD
+++ b/aos/events/logging/BUILD
@@ -606,6 +606,36 @@
)
aos_config(
+ name = "multinode_pingpong_split4_mixed1_config",
+ src = "multinode_pingpong_split4_mixed1.json",
+ flatbuffers = [
+ "//aos/events:ping_fbs",
+ "//aos/events:pong_fbs",
+ "//aos/network:message_bridge_client_fbs",
+ "//aos/network:message_bridge_server_fbs",
+ "//aos/network:remote_message_fbs",
+ "//aos/network:timestamp_fbs",
+ ],
+ target_compatible_with = ["@platforms//os:linux"],
+ deps = ["//aos/events:aos_config"],
+)
+
+aos_config(
+ name = "multinode_pingpong_split4_mixed2_config",
+ src = "multinode_pingpong_split4_mixed2.json",
+ flatbuffers = [
+ "//aos/events:ping_fbs",
+ "//aos/events:pong_fbs",
+ "//aos/network:message_bridge_client_fbs",
+ "//aos/network:message_bridge_server_fbs",
+ "//aos/network:remote_message_fbs",
+ "//aos/network:timestamp_fbs",
+ ],
+ target_compatible_with = ["@platforms//os:linux"],
+ deps = ["//aos/events:aos_config"],
+)
+
+aos_config(
name = "multinode_pingpong_split4_reliable_config",
src = "multinode_pingpong_split4_reliable.json",
flatbuffers = [
@@ -696,6 +726,8 @@
":multinode_pingpong_combined_config",
":multinode_pingpong_split3_config",
":multinode_pingpong_split4_config",
+ ":multinode_pingpong_split4_mixed1_config",
+ ":multinode_pingpong_split4_mixed2_config",
":multinode_pingpong_split4_reliable_config",
":multinode_pingpong_split_config",
":multinode_pingpong_triangle_split_config",
diff --git a/aos/events/logging/logfile_sorting.cc b/aos/events/logging/logfile_sorting.cc
index 89801b7..cf53552 100644
--- a/aos/events/logging/logfile_sorting.cc
+++ b/aos/events/logging/logfile_sorting.cc
@@ -1276,6 +1276,21 @@
// .oldest_remote_unreliable_monotonic_timestamp=9223372036.854775807sec,
// .oldest_local_unreliable_monotonic_timestamp=9223372036.854775807sec
// }
+ // 4) One reliable, one unreliable, local times don't match. 1 < 2
+ // same message got sent, and with reliable timestamps, we don't
+ // know how long it took to cross the network.
+ // {
+ // .oldest_remote_reliable_monotonic_timestamp=9223372036.854775807sec,
+ // .oldest_local_reliable_monotonic_timestamp=9223372036.854775807sec
+ // .oldest_remote_unreliable_monotonic_timestamp=10.122999611sec,
+ // .oldest_local_unreliable_monotonic_timestamp=9.400951024sec,
+ // }
+ // {
+ // .oldest_remote_reliable_monotonic_timestamp=11.798054208sec,
+ // .oldest_local_reliable_monotonic_timestamp=23457.772660691sec,
+ // .oldest_remote_unreliable_monotonic_timestamp=9223372036.854775807sec,
+ // .oldest_local_unreliable_monotonic_timestamp=9223372036.854775807sec
+ // }
//
// Writing all this out for which timestamps we have out of all 32
// combinations, and which cases each of the correspond to:
@@ -1286,10 +1301,10 @@
// { }, {ru} no match -> fail, won't be in the list
// { u}, { } no match -> fail, won't be in the list
// { u}, { u} no match -> 1
- // { u}, {r } no match -> fail
+ // { u}, {r } no match -> 4
// { u}, {ru} no match -> 1
// {r }, { } no match -> fail, won't be in the list
- // {r }, { u} no match -> fail
+ // {r }, { u} no match -> 4
// {r }, {r } no match -> 2
// {r }, {ru} no match -> 2
// {ru}, { } no match -> fail, won't be in the list
@@ -1353,11 +1368,13 @@
aos::monotonic_clock::max_time;
if (both_unreliable) {
+ VLOG(1) << "Both Unreliable";
return std::get<1>(a)
.oldest_local_unreliable_monotonic_timestamp <
std::get<1>(b)
.oldest_local_unreliable_monotonic_timestamp;
} else if (both_reliable) {
+ VLOG(1) << "Both Reliable";
CHECK_NE(
std::get<1>(a).oldest_local_reliable_monotonic_timestamp,
std::get<1>(b).oldest_local_reliable_monotonic_timestamp)
@@ -1367,6 +1384,42 @@
return std::get<1>(a)
.oldest_local_reliable_monotonic_timestamp <
std::get<1>(b).oldest_local_reliable_monotonic_timestamp;
+
+ } else if (std::get<1>(a)
+ .oldest_local_reliable_monotonic_timestamp !=
+ aos::monotonic_clock::max_time &&
+ std::get<1>(b)
+ .oldest_local_unreliable_monotonic_timestamp !=
+ aos::monotonic_clock::max_time) {
+ VLOG(1)
+ << " Comparing Reliable "
+ << std::get<1>(a).oldest_local_reliable_monotonic_timestamp;
+ VLOG(1) << " Versus Uneliable "
+ << std::get<1>(b)
+ .oldest_local_unreliable_monotonic_timestamp;
+
+ return std::get<1>(a)
+ .oldest_local_reliable_monotonic_timestamp <
+ std::get<1>(b)
+ .oldest_local_unreliable_monotonic_timestamp;
+
+ } else if (std::get<1>(a)
+ .oldest_local_unreliable_monotonic_timestamp !=
+ aos::monotonic_clock::max_time &&
+ std::get<1>(b)
+ .oldest_local_reliable_monotonic_timestamp !=
+ aos::monotonic_clock::max_time) {
+ VLOG(1) << " Comparing Unreliable "
+ << std::get<1>(a)
+ .oldest_local_unreliable_monotonic_timestamp;
+ VLOG(1)
+ << " Versus Reliable "
+ << std::get<1>(b).oldest_local_reliable_monotonic_timestamp;
+
+ return std::get<1>(a)
+ .oldest_local_unreliable_monotonic_timestamp <
+ std::get<1>(b).oldest_local_reliable_monotonic_timestamp;
+
} else {
LOG(FATAL) << "Broken logic, unable to compare timestamps "
<< std::get<1>(a) << ", " << std::get<1>(b);
diff --git a/aos/events/logging/multinode_logger_test.cc b/aos/events/logging/multinode_logger_test.cc
index 5d18d3d..d9d04b2 100644
--- a/aos/events/logging/multinode_logger_test.cc
+++ b/aos/events/logging/multinode_logger_test.cc
@@ -1,3 +1,5 @@
+#include <algorithm>
+
#include "aos/events/logging/log_reader.h"
#include "aos/events/logging/multinode_logger_test_lib.h"
#include "aos/events/message_counter.h"
@@ -3593,6 +3595,146 @@
ConfirmReadable(filenames);
}
+// Tests that we properly handle only one direction ever existing after a reboot
+// with mixed unreliable vs reliable, where reliable has an earlier timestamp
+// than unreliable.
+TEST(MissingDirectionTest, OneDirectionAfterRebootMixedCase1) {
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(ArtifactPath(
+ "aos/events/logging/multinode_pingpong_split4_mixed1_config.json"));
+ message_bridge::TestingTimeConverter time_converter(
+ configuration::NodesCount(&config.message()));
+ SimulatedEventLoopFactory event_loop_factory(&config.message());
+ event_loop_factory.SetTimeConverter(&time_converter);
+
+ NodeEventLoopFactory *const pi1 =
+ event_loop_factory.GetNodeEventLoopFactory("pi1");
+ const size_t pi1_index = configuration::GetNodeIndex(
+ event_loop_factory.configuration(), pi1->node());
+ NodeEventLoopFactory *const pi2 =
+ event_loop_factory.GetNodeEventLoopFactory("pi2");
+ const size_t pi2_index = configuration::GetNodeIndex(
+ event_loop_factory.configuration(), pi2->node());
+ std::vector<std::string> filenames;
+
+ {
+ CHECK_EQ(pi1_index, 0u);
+ CHECK_EQ(pi2_index, 1u);
+
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch(),
+ {BootTimestamp::epoch(), BootTimestamp::epoch()});
+
+ const chrono::nanoseconds reboot_time = chrono::milliseconds(5000);
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch() + reboot_time,
+ {BootTimestamp{.boot = 1, .time = monotonic_clock::epoch()},
+ BootTimestamp::epoch() + reboot_time});
+ }
+
+ const std::string kLogfile2_1 =
+ aos::testing::TestTmpDir() + "/multi_logfile2.1/";
+ util::UnlinkRecursive(kLogfile2_1);
+
+ // The following sequence using the above reference config creates
+ // a reliable message timestamp < unreliable message timestamp.
+ {
+ pi1->DisableStatistics();
+ pi2->DisableStatistics();
+
+ event_loop_factory.RunFor(chrono::milliseconds(95));
+
+ pi1->AlwaysStart<Ping>("ping");
+
+ event_loop_factory.RunFor(chrono::milliseconds(5250));
+
+ pi1->EnableStatistics();
+
+ event_loop_factory.RunFor(chrono::milliseconds(1000));
+
+ LoggerState pi2_logger = MakeLoggerState(
+ pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+
+ pi2_logger.StartLogger(kLogfile2_1);
+
+ event_loop_factory.RunFor(chrono::milliseconds(5000));
+ pi2_logger.AppendAllFilenames(&filenames);
+ }
+
+ const std::vector<LogFile> sorted_parts = SortParts(filenames);
+ ConfirmReadable(filenames);
+}
+
+// Tests that we properly handle only one direction ever existing after a reboot
+// with mixed unreliable vs reliable, where unreliable has an earlier timestamp
+// than reliable.
+TEST(MissingDirectionTest, OneDirectionAfterRebootMixedCase2) {
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(ArtifactPath(
+ "aos/events/logging/multinode_pingpong_split4_mixed2_config.json"));
+ message_bridge::TestingTimeConverter time_converter(
+ configuration::NodesCount(&config.message()));
+ SimulatedEventLoopFactory event_loop_factory(&config.message());
+ event_loop_factory.SetTimeConverter(&time_converter);
+
+ NodeEventLoopFactory *const pi1 =
+ event_loop_factory.GetNodeEventLoopFactory("pi1");
+ const size_t pi1_index = configuration::GetNodeIndex(
+ event_loop_factory.configuration(), pi1->node());
+ NodeEventLoopFactory *const pi2 =
+ event_loop_factory.GetNodeEventLoopFactory("pi2");
+ const size_t pi2_index = configuration::GetNodeIndex(
+ event_loop_factory.configuration(), pi2->node());
+ std::vector<std::string> filenames;
+
+ {
+ CHECK_EQ(pi1_index, 0u);
+ CHECK_EQ(pi2_index, 1u);
+
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch(),
+ {BootTimestamp::epoch(), BootTimestamp::epoch()});
+
+ const chrono::nanoseconds reboot_time = chrono::milliseconds(5000);
+ time_converter.AddNextTimestamp(
+ distributed_clock::epoch() + reboot_time,
+ {BootTimestamp{.boot = 1, .time = monotonic_clock::epoch()},
+ BootTimestamp::epoch() + reboot_time});
+ }
+
+ const std::string kLogfile2_1 =
+ aos::testing::TestTmpDir() + "/multi_logfile2.1/";
+ util::UnlinkRecursive(kLogfile2_1);
+
+ // The following sequence using the above reference config creates
+ // an unreliable message timestamp < reliable message timestamp.
+ {
+ pi1->DisableStatistics();
+ pi2->DisableStatistics();
+
+ event_loop_factory.RunFor(chrono::milliseconds(95));
+
+ pi1->AlwaysStart<Ping>("ping");
+
+ event_loop_factory.RunFor(chrono::milliseconds(5250));
+
+ pi1->EnableStatistics();
+
+ event_loop_factory.RunFor(chrono::milliseconds(1000));
+
+ LoggerState pi2_logger = MakeLoggerState(
+ pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+
+ pi2_logger.StartLogger(kLogfile2_1);
+
+ event_loop_factory.RunFor(chrono::milliseconds(5000));
+ pi2_logger.AppendAllFilenames(&filenames);
+ }
+
+ const std::vector<LogFile> sorted_parts = SortParts(filenames);
+ ConfirmReadable(filenames);
+}
+
// Tests that we properly handle what used to be a time violation in one
// direction. This can occur when one direction goes down after sending some
// data, but the other keeps working. The down direction ends up resolving to a
diff --git a/aos/events/logging/multinode_pingpong_split4_mixed1.json b/aos/events/logging/multinode_pingpong_split4_mixed1.json
new file mode 100644
index 0000000..d376b00
--- /dev/null
+++ b/aos/events/logging/multinode_pingpong_split4_mixed1.json
@@ -0,0 +1,176 @@
+{
+ "channels": [
+ {
+ "name": "/pi1/aos",
+ "type": "aos.logging.LogMessageFbs",
+ "source_node": "pi1",
+ "frequency": 200,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.logging.LogMessageFbs",
+ "source_node": "pi2",
+ "frequency": 200,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ /* Logged on pi1 locally */
+ {
+ "name": "/pi1/aos",
+ "type": "aos.timing.Report",
+ "source_node": "pi1",
+ "frequency": 50,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.timing.Report",
+ "source_node": "pi2",
+ "frequency": 50,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.ServerStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.ServerStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi2"
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.ClientStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.ClientStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi2"
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.Timestamp",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi2"],
+ "source_node": "pi1",
+ "destination_nodes": [
+ {
+ "name": "pi2",
+ "time_to_live": 5000000
+ }
+ ]
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.Timestamp",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "source_node": "pi2",
+ "destination_nodes": [
+ {
+ "name": "pi1"
+ }
+ ]
+ },
+ {
+ "name": "/pi1/aos/remote_timestamps/pi2/pi1/aos/aos-message_bridge-Timestamp",
+ "type": "aos.message_bridge.RemoteMessage",
+ "logger": "NOT_LOGGED",
+ "num_senders": 2,
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi1/aos/remote_timestamps/pi2/test/aos-examples-Ping",
+ "type": "aos.message_bridge.RemoteMessage",
+ "logger": "NOT_LOGGED",
+ "num_senders": 2,
+ "source_node": "pi1",
+ "frequency": 150
+ },
+ {
+ "name": "/pi2/aos/remote_timestamps/pi1/test/aos-examples-Pong",
+ "type": "aos.message_bridge.RemoteMessage",
+ "logger": "NOT_LOGGED",
+ "num_senders": 2,
+ "source_node": "pi2",
+ "frequency": 150
+ },
+ /* Forwarded to pi2 */
+ {
+ "name": "/test",
+ "type": "aos.examples.Ping",
+ "source_node": "pi1",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi2"],
+ "destination_nodes": [
+ {
+ "name": "pi2",
+ "priority": 1
+ }
+ ],
+ "frequency": 150
+ },
+ /* Forwarded back to pi1.
+ * The message is logged both on the sending node and the receiving node
+ * (to make it easier to look at the results for now).
+ *
+ * The timestamps are logged on the receiving node.
+ */
+ {
+ "name": "/test",
+ "type": "aos.examples.Pong",
+ "source_node": "pi2",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "destination_nodes": [
+ {
+ "name": "pi1",
+ "priority": 1
+ }
+ ],
+ "frequency": 150
+ }
+ ],
+ "maps": [
+ {
+ "match": {
+ "name": "/aos*",
+ "source_node": "pi1"
+ },
+ "rename": {
+ "name": "/pi1/aos"
+ }
+ },
+ {
+ "match": {
+ "name": "/aos*",
+ "source_node": "pi2"
+ },
+ "rename": {
+ "name": "/pi2/aos"
+ }
+ }
+ ],
+ "nodes": [
+ {
+ "name": "pi1",
+ "hostname": "raspberrypi",
+ "port": 9971
+ },
+ {
+ "name": "pi2",
+ "hostname": "raspberrypi2",
+ "port": 9971
+ }
+ ]
+}
diff --git a/aos/events/logging/multinode_pingpong_split4_mixed2.json b/aos/events/logging/multinode_pingpong_split4_mixed2.json
new file mode 100644
index 0000000..57235e0
--- /dev/null
+++ b/aos/events/logging/multinode_pingpong_split4_mixed2.json
@@ -0,0 +1,176 @@
+{
+ "channels": [
+ {
+ "name": "/pi1/aos",
+ "type": "aos.logging.LogMessageFbs",
+ "source_node": "pi1",
+ "frequency": 200,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.logging.LogMessageFbs",
+ "source_node": "pi2",
+ "frequency": 200,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ /* Logged on pi1 locally */
+ {
+ "name": "/pi1/aos",
+ "type": "aos.timing.Report",
+ "source_node": "pi1",
+ "frequency": 50,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.timing.Report",
+ "source_node": "pi2",
+ "frequency": 50,
+ "num_senders": 20,
+ "max_size": 2048
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.ServerStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.ServerStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi2"
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.ClientStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.ClientStatistics",
+ "logger": "LOCAL_LOGGER",
+ "source_node": "pi2"
+ },
+ {
+ "name": "/pi1/aos",
+ "type": "aos.message_bridge.Timestamp",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi2"],
+ "source_node": "pi1",
+ "destination_nodes": [
+ {
+ "name": "pi2"
+ }
+ ]
+ },
+ {
+ "name": "/pi2/aos",
+ "type": "aos.message_bridge.Timestamp",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "source_node": "pi2",
+ "destination_nodes": [
+ {
+ "name": "pi1"
+ }
+ ]
+ },
+ {
+ "name": "/pi1/aos/remote_timestamps/pi2/pi1/aos/aos-message_bridge-Timestamp",
+ "type": "aos.message_bridge.RemoteMessage",
+ "logger": "NOT_LOGGED",
+ "num_senders": 2,
+ "source_node": "pi1"
+ },
+ {
+ "name": "/pi1/aos/remote_timestamps/pi2/test/aos-examples-Ping",
+ "type": "aos.message_bridge.RemoteMessage",
+ "logger": "NOT_LOGGED",
+ "num_senders": 2,
+ "source_node": "pi1",
+ "frequency": 150
+ },
+ {
+ "name": "/pi2/aos/remote_timestamps/pi1/test/aos-examples-Pong",
+ "type": "aos.message_bridge.RemoteMessage",
+ "logger": "NOT_LOGGED",
+ "num_senders": 2,
+ "source_node": "pi2",
+ "frequency": 150
+ },
+ /* Forwarded to pi2 */
+ {
+ "name": "/test",
+ "type": "aos.examples.Ping",
+ "source_node": "pi1",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi2"],
+ "destination_nodes": [
+ {
+ "name": "pi2",
+ "priority": 1,
+ "time_to_live": 5000000
+ }
+ ],
+ "frequency": 150
+ },
+ /* Forwarded back to pi1.
+ * The message is logged both on the sending node and the receiving node
+ * (to make it easier to look at the results for now).
+ *
+ * The timestamps are logged on the receiving node.
+ */
+ {
+ "name": "/test",
+ "type": "aos.examples.Pong",
+ "source_node": "pi2",
+ "logger": "LOCAL_AND_REMOTE_LOGGER",
+ "logger_nodes": ["pi1"],
+ "destination_nodes": [
+ {
+ "name": "pi1",
+ "priority": 1
+ }
+ ],
+ "frequency": 150
+ }
+ ],
+ "maps": [
+ {
+ "match": {
+ "name": "/aos*",
+ "source_node": "pi1"
+ },
+ "rename": {
+ "name": "/pi1/aos"
+ }
+ },
+ {
+ "match": {
+ "name": "/aos*",
+ "source_node": "pi2"
+ },
+ "rename": {
+ "name": "/pi2/aos"
+ }
+ }
+ ],
+ "nodes": [
+ {
+ "name": "pi1",
+ "hostname": "raspberrypi",
+ "port": 9971
+ },
+ {
+ "name": "pi2",
+ "hostname": "raspberrypi2",
+ "port": 9971
+ }
+ ]
+}