Refactor the logic from SortParts into a class

This doesn't change the interface exposed to the user, but makes the
code easier to follow and adds a good spot for adding interfaces and
other things needed to make the code more robust as sorting gets more
and more complicated.

Change-Id: I179c13b4b4c8193089b00ed63ad1ab38c1937ecf
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/events/logging/logfile_sorting.cc b/aos/events/logging/logfile_sorting.cc
index bdb7dd5..acb2219 100644
--- a/aos/events/logging/logfile_sorting.cc
+++ b/aos/events/logging/logfile_sorting.cc
@@ -100,77 +100,94 @@
   return found_logfiles;
 }
 
-std::vector<LogFile> SortParts(const std::vector<std::string> &parts) {
+// Start by grouping all parts by UUID, and extracting the part index.
+// Datastructure to hold all the info extracted from a set of parts which go
+// together so we can sort them afterwords.
+struct UnsortedLogParts {
+  // Start times.
+  aos::monotonic_clock::time_point monotonic_start_time;
+  aos::realtime_clock::time_point realtime_start_time;
+
+  aos::monotonic_clock::time_point logger_monotonic_start_time =
+      aos::monotonic_clock::min_time;
+  aos::realtime_clock::time_point logger_realtime_start_time =
+      aos::realtime_clock::min_time;
+
+  // Node to save.
+  std::string node;
+
+  // The boot UUID of the node which generated this data.
+  std::string source_boot_uuid;
+
+  // Pairs of the filename and the part index for sorting.
+  std::vector<std::pair<std::string, int>> parts;
+
+  std::string config_sha256;
+};
+
+// Struct to hold both the node, and the parts associated with it.
+struct UnsortedLogPartsMap {
+  std::string logger_node;
+  // The boot UUID of the node this log file was created on.
+  std::string logger_boot_uuid;
+
+  aos::monotonic_clock::time_point monotonic_start_time =
+      aos::monotonic_clock::min_time;
+  aos::realtime_clock::time_point realtime_start_time =
+      aos::realtime_clock::min_time;
+
+  // Name from a log.  All logs below have been confirmed to match.
+  std::string name;
+
+  std::map<std::string, UnsortedLogParts> unsorted_parts;
+};
+
+// Sort part files without UUIDs and part indexes as well.  Extract everything
+// useful from the log in the first pass, then sort later.
+struct UnsortedOldParts {
+  // Part information with everything but the list of parts.
+  LogParts parts;
+
+  // Tuple of time for the data and filename needed for sorting after
+  // extracting.
+  std::vector<std::pair<monotonic_clock::time_point, std::string>>
+      unsorted_parts;
+
+  // Name from a log.  All logs below have been confirmed to match.
+  std::string name;
+};
+
+// Helper class to make it easier to sort a list of log files into
+// std::vector<LogFile>
+struct PartsSorter {
+  // List of files that were corrupted.
   std::vector<std::string> corrupted;
 
+  // Map from sha256 to the config.
   std::map<std::string, std::shared_ptr<const Configuration>>
       config_sha256_lookup;
 
-  // Start by grouping all parts by UUID, and extracting the part index.
-  // Datastructure to hold all the info extracted from a set of parts which go
-  // together so we can sort them afterwords.
-  struct UnsortedLogParts {
-    // Start times.
-    aos::monotonic_clock::time_point monotonic_start_time;
-    aos::realtime_clock::time_point realtime_start_time;
-
-    aos::monotonic_clock::time_point logger_monotonic_start_time =
-        aos::monotonic_clock::min_time;
-    aos::realtime_clock::time_point logger_realtime_start_time =
-        aos::realtime_clock::min_time;
-
-    // Node to save.
-    std::string node;
-
-    // The boot UUID of the node which generated this data.
-    std::string source_boot_uuid;
-
-    // Pairs of the filename and the part index for sorting.
-    std::vector<std::pair<std::string, int>> parts;
-
-    std::string config_sha256;
-  };
-
-  // Struct to hold both the node, and the parts associated with it.
-  struct UnsortedLogPartsMap {
-    std::string logger_node;
-    // The boot UUID of the node this log file was created on.
-    std::string logger_boot_uuid;
-
-    aos::monotonic_clock::time_point monotonic_start_time =
-        aos::monotonic_clock::min_time;
-    aos::realtime_clock::time_point realtime_start_time =
-        aos::realtime_clock::min_time;
-
-    // Name from a log.  All logs below have been confirmed to match.
-    std::string name;
-
-    std::map<std::string, UnsortedLogParts> unsorted_parts;
-  };
-
   // Map holding the log_event_uuid -> second map.  The second map holds the
   // parts_uuid -> list of parts for sorting.
   std::map<std::string, UnsortedLogPartsMap> parts_list;
 
-  // Sort part files without UUIDs and part indexes as well.  Extract everything
-  // useful from the log in the first pass, then sort later.
-  struct UnsortedOldParts {
-    // Part information with everything but the list of parts.
-    LogParts parts;
-
-    // Tuple of time for the data and filename needed for sorting after
-    // extracting.
-    std::vector<std::pair<monotonic_clock::time_point, std::string>>
-        unsorted_parts;
-
-    // Name from a log.  All logs below have been confirmed to match.
-    std::string name;
-  };
-
   // A list of all the old parts which we don't know how to sort using uuids.
   // There are enough of these in the wild that this is worth supporting.
   std::vector<UnsortedOldParts> old_parts;
 
+  // Populates the class's datastructures from the input file list.
+  void PopulateFromFiles(const std::vector<std::string> &parts);
+
+  // Reformats old_parts into a list of logfiles and returns it.  This destroys
+  // state in PartsSorter.
+  std::vector<LogFile> FormatOldParts();
+
+  // Reformats parts_list into a list of logfiles and returns it.  This destroys
+  // state in PartsSorter.
+  std::vector<LogFile> FormatNewParts();
+};
+
+void PartsSorter::PopulateFromFiles(const std::vector<std::string> &parts) {
   // Now extract everything into our datastructures above for sorting.
   for (const std::string &part : parts) {
     std::optional<SizePrefixedFlatbufferVector<LogFileHeader>> log_header =
@@ -356,79 +373,71 @@
 
     it->second.parts.emplace_back(std::make_pair(part, parts_index));
   }
+}
 
-  if (old_parts.empty() && parts_list.empty()) {
-    if (parts.empty()) {
-      return std::vector<LogFile>{};
-    } else {
-      LogFile log_file;
-      log_file.corrupted = std::move(corrupted);
-      return std::vector<LogFile>{log_file};
-    }
-  }
-  CHECK_NE(old_parts.empty(), parts_list.empty())
-      << ": Can't have a mix of old and new parts.";
-
+std::vector<LogFile> PartsSorter::FormatOldParts() {
   // Now reformat old_parts to be in the right datastructure to report.
   std::map<std::string, std::shared_ptr<const Configuration>>
       copied_config_sha256;
-  if (!old_parts.empty()) {
-    std::vector<LogFile> result;
-    for (UnsortedOldParts &p : old_parts) {
-      // Sort by the oldest message in each file.
-      std::sort(
-          p.unsorted_parts.begin(), p.unsorted_parts.end(),
-          [](const std::pair<monotonic_clock::time_point, std::string> &a,
-             const std::pair<monotonic_clock::time_point, std::string> &b) {
-            return a.first < b.first;
-          });
-      LogFile log_file;
+  CHECK(!old_parts.empty());
 
-      // We want to use a single Configuration flatbuffer for all the parts to
-      // make downstream easier.  Since this is an old log, it doesn't have a
-      // SHA256 in the header to rely on, so we need a way to detect duplicates.
-      //
-      // SHA256 is decently fast, so use that as a representative hash of the
-      // header.
-      auto header =
-          std::make_shared<SizePrefixedFlatbufferVector<LogFileHeader>>(
-              std::move(*ReadHeader(p.unsorted_parts[0].second)));
+  std::vector<LogFile> result;
+  for (UnsortedOldParts &p : old_parts) {
+    // Sort by the oldest message in each file.
+    std::sort(p.unsorted_parts.begin(), p.unsorted_parts.end(),
+              [](const std::pair<monotonic_clock::time_point, std::string> &a,
+                 const std::pair<monotonic_clock::time_point, std::string> &b) {
+                return a.first < b.first;
+              });
+    LogFile log_file;
 
-      // Do a recursive copy to normalize the flatbuffer.  Different
-      // configurations can be built different ways, and can even have their
-      // vtable out of order.  Don't think and just trigger a copy.
-      FlatbufferDetachedBuffer<Configuration> config_copy =
-          RecursiveCopyFlatBuffer(header->message().configuration());
+    // We want to use a single Configuration flatbuffer for all the parts to
+    // make downstream easier.  Since this is an old log, it doesn't have a
+    // SHA256 in the header to rely on, so we need a way to detect duplicates.
+    //
+    // SHA256 is decently fast, so use that as a representative hash of the
+    // header.
+    auto header = std::make_shared<SizePrefixedFlatbufferVector<LogFileHeader>>(
+        std::move(*ReadHeader(p.unsorted_parts[0].second)));
 
-      std::string config_copy_sha256 = Sha256(config_copy.span());
+    // Do a recursive copy to normalize the flatbuffer.  Different
+    // configurations can be built different ways, and can even have their
+    // vtable out of order.  Don't think and just trigger a copy.
+    FlatbufferDetachedBuffer<Configuration> config_copy =
+        RecursiveCopyFlatBuffer(header->message().configuration());
 
-      auto it = copied_config_sha256.find(config_copy_sha256);
-      if (it != copied_config_sha256.end()) {
-        log_file.config = it->second;
-      } else {
-        std::shared_ptr<const Configuration> config(
-            header, header->message().configuration());
+    std::string config_copy_sha256 = Sha256(config_copy.span());
 
-        copied_config_sha256.emplace(std::move(config_copy_sha256), config);
-        log_file.config = config;
-      }
+    auto it = copied_config_sha256.find(config_copy_sha256);
+    if (it != copied_config_sha256.end()) {
+      log_file.config = it->second;
+    } else {
+      std::shared_ptr<const Configuration> config(
+          header, header->message().configuration());
 
-      for (std::pair<monotonic_clock::time_point, std::string> &f :
-           p.unsorted_parts) {
-        p.parts.parts.emplace_back(std::move(f.second));
-      }
-      p.parts.config = log_file.config;
-      log_file.parts.emplace_back(std::move(p.parts));
-      log_file.monotonic_start_time = log_file.parts[0].monotonic_start_time;
-      log_file.realtime_start_time = log_file.parts[0].realtime_start_time;
-      log_file.corrupted = corrupted;
-      log_file.name = p.name;
-      result.emplace_back(std::move(log_file));
+      copied_config_sha256.emplace(std::move(config_copy_sha256), config);
+      log_file.config = config;
     }
 
-    return result;
+    for (std::pair<monotonic_clock::time_point, std::string> &f :
+         p.unsorted_parts) {
+      p.parts.parts.emplace_back(std::move(f.second));
+    }
+    p.parts.config = log_file.config;
+    log_file.parts.emplace_back(std::move(p.parts));
+    log_file.monotonic_start_time = log_file.parts[0].monotonic_start_time;
+    log_file.realtime_start_time = log_file.parts[0].realtime_start_time;
+    log_file.corrupted = corrupted;
+    log_file.name = p.name;
+    result.emplace_back(std::move(log_file));
   }
 
+  return result;
+}
+
+std::vector<LogFile> PartsSorter::FormatNewParts() {
+  std::map<std::string, std::shared_ptr<const Configuration>>
+      copied_config_sha256;
   // Now, sort them and produce the final vector form.
   std::vector<LogFile> result;
   result.reserve(parts_list.size());
@@ -530,6 +539,29 @@
   return result;
 }
 
+std::vector<LogFile> SortParts(const std::vector<std::string> &parts) {
+  PartsSorter sorter;
+  sorter.PopulateFromFiles(parts);
+
+  if (sorter.old_parts.empty() && sorter.parts_list.empty()) {
+    if (parts.empty()) {
+      return std::vector<LogFile>{};
+    } else {
+      LogFile log_file;
+      log_file.corrupted = std::move(sorter.corrupted);
+      return std::vector<LogFile>{log_file};
+    }
+  }
+  CHECK_NE(sorter.old_parts.empty(), sorter.parts_list.empty())
+      << ": Can't have a mix of old and new parts.";
+
+  if (!sorter.old_parts.empty()) {
+    return sorter.FormatOldParts();
+  }
+
+  return sorter.FormatNewParts();
+}
+
 std::vector<std::string> FindNodes(const std::vector<LogFile> &parts) {
   std::set<std::string> nodes;
   for (const LogFile &log_file : parts) {
@@ -610,6 +642,7 @@
   }
   if (!parts.source_boot_uuid.empty()) {
     stream << "  \"source_boot_uuid\": \"" << parts.source_boot_uuid << "\",\n";
+    stream << "  \"boot_count\": " << parts.boot_count << ",\n";
   }
   stream << "  \"config\": " << parts.config.get();
   if (!parts.config_sha256.empty()) {
diff --git a/aos/events/logging/logfile_sorting.h b/aos/events/logging/logfile_sorting.h
index 964e592..71196c8 100644
--- a/aos/events/logging/logfile_sorting.h
+++ b/aos/events/logging/logfile_sorting.h
@@ -38,6 +38,10 @@
   // data, this is the boot_uuid of the remote node.
   std::string source_boot_uuid;
 
+  // Boot number for this node.  This communicates the order of all the
+  // source_boot_uuid's for a node.
+  size_t boot_count = 0;
+
   // Pre-sorted list of parts.
   std::vector<std::string> parts;