Actually use the logger local and remote timestamps for sorting

This should make a large chunk of new logs sortable!  Since we don't
trust the combined reliable/unreliable timestamps anymore for helping us
sort, stop using them and switch to these new timestamps instead when
available.

Change-Id: Ic9c9c45c580cdf9e7d4fe84375eceab4b831b161
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/events/logging/logfile_sorting.cc b/aos/events/logging/logfile_sorting.cc
index f8d33c3..1e7c817 100644
--- a/aos/events/logging/logfile_sorting.cc
+++ b/aos/events/logging/logfile_sorting.cc
@@ -263,6 +263,13 @@
 
 // For a pair of nodes, this holds the oldest times that messages were
 // transfered.
+//
+// TODO(austin): We are finding that the timestamps in
+// oldest_remote_monotonic_timestamp are not very useful because there may not
+// be reliable messages on both boots in the log.  Figure out how to use that
+// data better (or really, use the new reliable remote timestamps field) and
+// update this code.  I think we can delay a bit until someone figures out how
+// to get here without other code paths sorting us first.
 struct BootPairTimes {
   // Pair of local and remote timestamps for the oldest message forwarded to
   // this node.
@@ -620,6 +627,21 @@
                log_header->message()
                    .oldest_remote_unreliable_monotonic_timestamps()
                    ->size());
+      CHECK_EQ(log_header->message()
+                   .has_oldest_logger_local_unreliable_monotonic_timestamps(),
+               log_header->message()
+                   .has_oldest_logger_remote_unreliable_monotonic_timestamps());
+      if (log_header->message()
+              .has_oldest_logger_local_unreliable_monotonic_timestamps()) {
+        CHECK_EQ(boot_uuids_size,
+                 log_header->message()
+                     .oldest_logger_local_unreliable_monotonic_timestamps()
+                     ->size());
+        CHECK_EQ(boot_uuids_size,
+                 log_header->message()
+                     .oldest_logger_remote_unreliable_monotonic_timestamps()
+                     ->size());
+      }
       CHECK(!logger_boot_uuid.empty());
       CHECK(!source_boot_uuid.empty());
       for (size_t node_index = 0; node_index < boot_uuids_size; ++node_index) {
@@ -644,7 +666,26 @@
                 log_header->message()
                     .oldest_remote_unreliable_monotonic_timestamps()
                     ->Get(node_index)));
-        if (boot_uuid.empty() || boot_uuid == source_boot_uuid) {
+
+        const monotonic_clock::time_point
+            oldest_logger_local_unreliable_monotonic_timestamp =
+                log_header->message()
+                        .has_oldest_logger_local_unreliable_monotonic_timestamps()
+                    ? monotonic_clock::time_point(chrono::nanoseconds(
+                          log_header->message()
+                              .oldest_logger_local_unreliable_monotonic_timestamps()
+                              ->Get(node_index)))
+                    : monotonic_clock::max_time;
+        const monotonic_clock::time_point
+            oldest_logger_remote_unreliable_monotonic_timestamp =
+                log_header->message()
+                        .has_oldest_logger_remote_unreliable_monotonic_timestamps()
+                    ? monotonic_clock::time_point(chrono::nanoseconds(
+                          log_header->message()
+                              .oldest_logger_remote_unreliable_monotonic_timestamps()
+                              ->Get(node_index)))
+                    : monotonic_clock::max_time;
+        if (boot_uuid.empty()) {
           CHECK_EQ(oldest_local_monotonic_timestamp, monotonic_clock::max_time);
           CHECK_EQ(oldest_remote_monotonic_timestamp,
                    monotonic_clock::max_time);
@@ -652,9 +693,113 @@
                    monotonic_clock::max_time);
           CHECK_EQ(oldest_remote_unreliable_monotonic_timestamp,
                    monotonic_clock::max_time);
+          CHECK_EQ(oldest_logger_local_unreliable_monotonic_timestamp,
+                   monotonic_clock::max_time);
+          CHECK_EQ(oldest_logger_remote_unreliable_monotonic_timestamp,
+                   monotonic_clock::max_time);
           continue;
         }
 
+        if (boot_uuid == source_boot_uuid) {
+          CHECK_EQ(oldest_local_monotonic_timestamp, monotonic_clock::max_time);
+          CHECK_EQ(oldest_remote_monotonic_timestamp,
+                   monotonic_clock::max_time);
+          CHECK_EQ(oldest_local_unreliable_monotonic_timestamp,
+                   monotonic_clock::max_time);
+          CHECK_EQ(oldest_remote_unreliable_monotonic_timestamp,
+                   monotonic_clock::max_time);
+          if (oldest_logger_local_unreliable_monotonic_timestamp !=
+                     monotonic_clock::max_time) {
+            CHECK_NE(oldest_logger_remote_unreliable_monotonic_timestamp,
+                     monotonic_clock::max_time);
+            // Now, we found a timestamp going the other way.  Add it in!
+            auto logger_node_boot_times_it = boot_times.find(logger_node);
+            if (logger_node_boot_times_it == boot_times.end()) {
+              logger_node_boot_times_it =
+                  boot_times
+                      .emplace(
+                          logger_node,
+                          absl::btree_map<
+                              std::string,
+                              absl::btree_map<
+                                  size_t, absl::btree_map<
+                                              std::string,
+                                              std::vector<BootPairTimes>>>>())
+                      .first;
+            }
+
+            auto logger_source_boot_times_it =
+                logger_node_boot_times_it->second.find(
+                    std::string(logger_boot_uuid));
+
+            if (logger_source_boot_times_it ==
+                logger_node_boot_times_it->second.end()) {
+              logger_source_boot_times_it =
+                  logger_node_boot_times_it->second
+                      .emplace(
+                          logger_boot_uuid,
+                          absl::btree_map<
+                              size_t,
+                              absl::btree_map<std::string,
+                                              std::vector<BootPairTimes>>>())
+                      .first;
+            }
+
+            // We need the index of the source node.  Luckily, since we are at
+            // the index in the boot UUID list which matches the source node
+            // boot uuid, we know it's index!
+            auto logger_destination_boot_times_it =
+                logger_source_boot_times_it->second.find(node_index);
+            if (logger_destination_boot_times_it ==
+                logger_source_boot_times_it->second.end()) {
+              logger_destination_boot_times_it =
+                  logger_source_boot_times_it->second
+                      .emplace(node_index,
+                               absl::btree_map<std::string,
+                                               std::vector<BootPairTimes>>())
+                      .first;
+            }
+
+            auto logger_boot_times_it =
+                logger_destination_boot_times_it->second.find(
+                    std::string(source_boot_uuid));
+
+            if (logger_boot_times_it ==
+                logger_destination_boot_times_it->second.end()) {
+              // We have a new boot UUID pairing.  Copy over the data we have.
+              logger_destination_boot_times_it->second.emplace(
+                  source_boot_uuid,
+                  std::vector<BootPairTimes>{BootPairTimes{
+                      .oldest_remote_monotonic_timestamp =
+                          monotonic_clock::max_time,
+                      .oldest_local_monotonic_timestamp =
+                          monotonic_clock::max_time,
+                      .oldest_remote_unreliable_monotonic_timestamp =
+                          oldest_logger_remote_unreliable_monotonic_timestamp,
+                      .oldest_local_unreliable_monotonic_timestamp =
+                          oldest_logger_local_unreliable_monotonic_timestamp}});
+            } else {
+              logger_boot_times_it->second.emplace_back(BootPairTimes{
+                  .oldest_remote_monotonic_timestamp =
+                      monotonic_clock::max_time,
+                  .oldest_local_monotonic_timestamp = monotonic_clock::max_time,
+                  .oldest_remote_unreliable_monotonic_timestamp =
+                      oldest_logger_remote_unreliable_monotonic_timestamp,
+                  .oldest_local_unreliable_monotonic_timestamp =
+                      oldest_logger_local_unreliable_monotonic_timestamp});
+            }
+          }
+          continue;
+        }
+
+        // There is no supported way to get logger timestamps from anything but
+        // the source node.  Since we've already handled that above, we should
+        // always expect max_time here.
+        CHECK_EQ(oldest_logger_local_unreliable_monotonic_timestamp,
+                 monotonic_clock::max_time);
+        CHECK_EQ(oldest_logger_remote_unreliable_monotonic_timestamp,
+                 monotonic_clock::max_time);
+
         // Now, we have a valid pairing.
         auto destination_boot_times_it =
             source_boot_times_it->second.find(node_index);
@@ -950,24 +1095,9 @@
             source_boot_times.begin(), source_boot_times.end(),
             [](const std::tuple<std::string, BootPairTimes, BootPairTimes> &a,
                const std::tuple<std::string, BootPairTimes, BootPairTimes> &b) {
-              // There are cases where we will only have a reliable timestamp.
-              // In that case, we need to use oldest_local_monotonic_timestamp.
-              // But, that may result in collisions if the same message gets
-              // forwarded to both boots, so it will have the same timestamp.
-              // Solve that by breaking the tie with the unreliable messages.
-              if (std::get<1>(a).oldest_local_monotonic_timestamp ==
-                  std::get<1>(b).oldest_local_monotonic_timestamp) {
-                CHECK_NE(
-                    std::get<1>(a).oldest_local_unreliable_monotonic_timestamp,
-                    std::get<1>(b).oldest_local_unreliable_monotonic_timestamp);
-                return std::get<1>(a)
-                           .oldest_local_unreliable_monotonic_timestamp <
-                       std::get<1>(b)
-                           .oldest_local_unreliable_monotonic_timestamp;
-              } else {
-                return std::get<1>(a).oldest_local_monotonic_timestamp <
-                       std::get<1>(b).oldest_local_monotonic_timestamp;
-              }
+              return std::get<1>(a)
+                         .oldest_local_unreliable_monotonic_timestamp <
+                     std::get<1>(b).oldest_local_unreliable_monotonic_timestamp;
             });
 
         // The last time from the source node on the logger node.
@@ -1091,21 +1221,8 @@
             destination_boot_times.begin(), destination_boot_times.end(),
             [](const std::pair<std::string, BootPairTimes> &a,
                const std::pair<std::string, BootPairTimes> &b) {
-              // There are cases where we will only have a reliable timestamp.
-              // In that case, we need to use oldest_remote_monotonic_timestamp.
-              // But, that may result in collisions if the same message gets
-              // forwarded to both boots, so it will have the same timestamp.
-              // Solve that by breaking the tie with the unreliable messages.
-              if (a.second.oldest_remote_monotonic_timestamp ==
-                  b.second.oldest_remote_monotonic_timestamp) {
-                CHECK_NE(a.second.oldest_remote_unreliable_monotonic_timestamp,
-                         b.second.oldest_remote_unreliable_monotonic_timestamp);
-                return a.second.oldest_remote_unreliable_monotonic_timestamp <
-                       b.second.oldest_remote_unreliable_monotonic_timestamp;
-              } else {
-                return a.second.oldest_remote_monotonic_timestamp <
-                       b.second.oldest_remote_monotonic_timestamp;
-              }
+              return a.second.oldest_remote_unreliable_monotonic_timestamp <
+                     b.second.oldest_remote_unreliable_monotonic_timestamp;
             });
 
         for (size_t boot_id = 0; boot_id < destination_boot_times.size();