Detect part files missing from the middle of a log

There are very few use cases where we want to delete part files out of
the middle.  Most of the rest of the code will complain bitterly.  Until
someone comes up with a use case, detect it when sorting and complain
then.

Change-Id: I8ddb0b6f2bde6c5eeb020a87783114f41df49137
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/events/logging/logfile_sorting.cc b/aos/events/logging/logfile_sorting.cc
index 82b4684..1a16456 100644
--- a/aos/events/logging/logfile_sorting.cc
+++ b/aos/events/logging/logfile_sorting.cc
@@ -7,6 +7,7 @@
 #include <vector>
 
 #include "absl/container/btree_map.h"
+#include "absl/strings/str_join.h"
 #include "aos/events/logging/logfile_utils.h"
 #include "aos/flatbuffer_merge.h"
 #include "aos/flatbuffers.h"
@@ -143,6 +144,16 @@
   }
 }
 
+std::string ConcatenateParts(
+    const std::vector<std::pair<std::string, int>> &parts) {
+  std::vector<std::string_view> stringview_parts;
+  stringview_parts.reserve(parts.size());
+  for (const std::pair<std::string, int> &p : parts) {
+    stringview_parts.emplace_back(p.first);
+  }
+  return absl::StrCat("[\"", absl::StrJoin(stringview_parts, "\", \""), "\"]");
+}
+
 }  // namespace
 
 void FindLogs(std::vector<std::string> *files, std::string filename) {
@@ -1318,14 +1329,15 @@
                 CHECK_NE(
                     std::get<1>(a).oldest_local_reliable_monotonic_timestamp,
                     std::get<1>(b).oldest_local_reliable_monotonic_timestamp)
-                    << ": The same reliable message has been forwarded to both "
-                       "boots.  This is ambiguous, please investigate.";
+                    << ": Broken logic, the same reliable message has been "
+                       "forwarded to both boots.  This is ambiguous, please "
+                       "investigate.";
                 return std::get<1>(a)
                            .oldest_local_reliable_monotonic_timestamp <
                        std::get<1>(b).oldest_local_reliable_monotonic_timestamp;
               } else {
-                LOG(FATAL) << "Unable to compare timestamps " << std::get<1>(a)
-                           << ", " << std::get<1>(b);
+                LOG(FATAL) << "Broken logic, unable to compare timestamps "
+                           << std::get<1>(a) << ", " << std::get<1>(b);
               }
             });
 
@@ -1362,8 +1374,9 @@
                          << MaxLocalBootTime(std::get<2>(fatal_boot_time))
                          << "] on remote " << remote_node_name;
             }
-            LOG(FATAL) << "Found overlapping boots on " << remote_node_name
-                       << " remote node " << remote_node_name << ", "
+            LOG(FATAL) << "Broken log, found overlapping boots on "
+                       << remote_node_name << " remote node "
+                       << remote_node_name << ", "
                        << MinLocalBootTime(std::get<1>(boot_time)) << " < "
                        << last_boot_time;
           }
@@ -1882,11 +1895,25 @@
       new_parts.parts.reserve(parts.second.parts.size());
       {
         int last_parts_index = -1;
+        std::string_view last_part_name;
         for (std::pair<std::string, int> &p : parts.second.parts) {
           CHECK_LT(last_parts_index, p.second)
-              << ": Found duplicate parts in '" << new_parts.parts.back()
+              << ": Broken log, Found duplicate parts in '" << last_part_name
               << "' and '" << p.first << "'";
+          if (last_parts_index != -1) {
+            if (p.second != last_parts_index + 1) {
+              LOG(FATAL) << "Broken log, missing part files between \""
+                         << last_part_name << "\" and \"" << p.first
+                         << "\", found "
+                         << ConcatenateParts(parts.second.parts);
+            }
+          }
           last_parts_index = p.second;
+          last_part_name = p.first;
+        }
+
+        // Now that we are happy that it works, move them all.
+        for (std::pair<std::string, int> &p : parts.second.parts) {
           new_parts.parts.emplace_back(std::move(p.first));
         }
       }
diff --git a/aos/events/logging/logfile_utils.cc b/aos/events/logging/logfile_utils.cc
index 01f3002..9768584 100644
--- a/aos/events/logging/logfile_utils.cc
+++ b/aos/events/logging/logfile_utils.cc
@@ -846,6 +846,7 @@
   if (m.has_monotonic_timestamp_time) {
     os << ", .monotonic_timestamp_time=" << m.monotonic_timestamp_time;
   }
+  os << "}";
   return os;
 }
 
@@ -884,6 +885,8 @@
   }
   if (m.data != nullptr) {
     os << ", .data=" << *m.data;
+  } else {
+    os << ", .data=nullptr";
   }
   os << "}";
   return os;
diff --git a/aos/events/logging/logfile_utils_test.cc b/aos/events/logging/logfile_utils_test.cc
index d024ae0..663302c 100644
--- a/aos/events/logging/logfile_utils_test.cc
+++ b/aos/events/logging/logfile_utils_test.cc
@@ -2718,7 +2718,7 @@
         const std::vector<LogFile> parts =
             SortParts({logfile0_, logfile1_, logfile2_, logfile3_});
       },
-      "Found overlapping boots on");
+      "found overlapping boots on");
 }
 
 // Tests that we MessageReader blows up on a bad message.
diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index ed7d150..9160891 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc
@@ -3643,6 +3643,41 @@
   EXPECT_DEATH({ SortParts(duplicates); }, "Found duplicate parts in");
 }
 
+// Tests that we explode if someone loses a part out of the middle of a log.
+TEST_P(MultinodeLoggerTest, MissingPartsFromMiddle) {
+  time_converter_.AddMonotonic(
+      {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
+  {
+    LoggerState pi1_logger = MakeLogger(pi1_);
+
+    event_loop_factory_.RunFor(chrono::milliseconds(95));
+
+
+    StartLogger(&pi1_logger);
+    aos::monotonic_clock::time_point last_rotation_time =
+        pi1_logger.event_loop->monotonic_now();
+    pi1_logger.logger->set_on_logged_period([&] {
+      const auto now = pi1_logger.event_loop->monotonic_now();
+      if (now > last_rotation_time + std::chrono::seconds(5)) {
+        pi1_logger.logger->Rotate();
+        last_rotation_time = now;
+      }
+    });
+
+    event_loop_factory_.RunFor(chrono::milliseconds(10000));
+  }
+
+  std::vector<std::string> missing_parts;
+
+  missing_parts.emplace_back(logfile_base1_ + "_pi1_data.part0" + Extension());
+  missing_parts.emplace_back(logfile_base1_ + "_pi1_data.part2" + Extension());
+  missing_parts.emplace_back(absl::StrCat(
+      logfile_base1_, "_", std::get<0>(GetParam()).sha256, Extension()));
+
+  EXPECT_DEATH({ SortParts(missing_parts); },
+               "Broken log, missing part files between");
+}
+
 // Tests that we properly handle a dead node.  Do this by just disconnecting it
 // and only using one nodes of logs.
 TEST_P(MultinodeLoggerTest, DeadNode) {