Separate logfile sorting into a separate library.

logger.cc was getting big and I'd like to use the datastructures in
logfile sorting in my new code.

Change-Id: If136fb36f24843432723ead1e86c86b2b36f1509
diff --git a/aos/events/logging/BUILD b/aos/events/logging/BUILD
index d829edd..6a85728 100644
--- a/aos/events/logging/BUILD
+++ b/aos/events/logging/BUILD
@@ -14,14 +14,17 @@
 cc_library(
     name = "logfile_utils",
     srcs = [
+        "logfile_sorting.cc",
         "logfile_utils.cc",
     ],
     hdrs = [
+        "logfile_sorting.h",
         "logfile_utils.h",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":buffer_encoder",
+        ":uuid",
         ":logger_fbs",
         "//aos:configuration",
         "//aos:flatbuffer_merge",
diff --git a/aos/events/logging/logfile_sorting.cc b/aos/events/logging/logfile_sorting.cc
new file mode 100644
index 0000000..f8f08ba
--- /dev/null
+++ b/aos/events/logging/logfile_sorting.cc
@@ -0,0 +1,238 @@
+#include "aos/events/logging/logfile_sorting.h"
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "aos/events/logging/logfile_utils.h"
+#include "aos/flatbuffers.h"
+#include "aos/time/time.h"
+
+namespace aos {
+namespace logger {
+namespace chrono = std::chrono;
+
+std::vector<LogFile> SortParts(const std::vector<std::string> &parts) {
+  // Start by grouping all parts by UUID, and extracting the part index.
+  // Datastructure to hold all the info extracted from a set of parts which go
+  // together so we can sort them afterwords.
+  struct UnsortedLogParts {
+    // Start times.
+    aos::monotonic_clock::time_point monotonic_start_time;
+    aos::realtime_clock::time_point realtime_start_time;
+
+    // Node to save.
+    std::string node;
+
+    // Pairs of the filename and the part index for sorting.
+    std::vector<std::pair<std::string, int>> parts;
+  };
+
+  // Map holding the log_event_uuid -> second map.  The second map holds the
+  // parts_uuid -> list of parts for sorting.
+  std::map<std::string, std::map<std::string, UnsortedLogParts>> parts_list;
+
+  // Sort part files without UUIDs and part indexes as well.  Extract everything
+  // useful from the log in the first pass, then sort later.
+  struct UnsortedOldParts {
+    // Part information with everything but the list of parts.
+    LogParts parts;
+
+    // Tuple of time for the data and filename needed for sorting after
+    // extracting.
+    std::vector<std::pair<monotonic_clock::time_point, std::string>>
+        unsorted_parts;
+  };
+
+  // A list of all the old parts which we don't know how to sort using uuids.
+  // There are enough of these in the wild that this is worth supporting.
+  std::vector<UnsortedOldParts> old_parts;
+
+  // Now extract everything into our datastructures above for sorting.
+  for (const std::string &part : parts) {
+    FlatbufferVector<LogFileHeader> log_header = ReadHeader(part);
+
+    const monotonic_clock::time_point monotonic_start_time(
+        chrono::nanoseconds(log_header.message().monotonic_start_time()));
+    const realtime_clock::time_point realtime_start_time(
+        chrono::nanoseconds(log_header.message().realtime_start_time()));
+
+    const std::string_view node =
+        log_header.message().has_node()
+            ? log_header.message().node()->name()->string_view()
+            : "";
+
+    // Looks like an old log.  No UUID, index, and also single node.  We have
+    // little to no multi-node log files in the wild without part UUIDs and
+    // indexes which we care much about.
+    if (!log_header.message().has_parts_uuid() &&
+        !log_header.message().has_parts_index() &&
+        !log_header.message().has_node()) {
+      FlatbufferVector<MessageHeader> first_message = ReadNthMessage(part, 0);
+      const monotonic_clock::time_point first_message_time(
+          chrono::nanoseconds(first_message.message().monotonic_sent_time()));
+
+      // Find anything with a matching start time.  They all go together.
+      auto result = std::find_if(
+          old_parts.begin(), old_parts.end(),
+          [&](const UnsortedOldParts &parts) {
+            return parts.parts.monotonic_start_time == monotonic_start_time &&
+                   parts.parts.realtime_start_time == realtime_start_time;
+          });
+
+      if (result == old_parts.end()) {
+        old_parts.emplace_back();
+        old_parts.back().parts.monotonic_start_time = monotonic_start_time;
+        old_parts.back().parts.realtime_start_time = realtime_start_time;
+        old_parts.back().unsorted_parts.emplace_back(
+            std::make_pair(first_message_time, part));
+      } else {
+        result->unsorted_parts.emplace_back(
+            std::make_pair(first_message_time, part));
+      }
+      continue;
+    }
+
+    CHECK(log_header.message().has_log_event_uuid());
+    CHECK(log_header.message().has_parts_uuid());
+    CHECK(log_header.message().has_parts_index());
+
+    const std::string log_event_uuid =
+        log_header.message().log_event_uuid()->str();
+    const std::string parts_uuid = log_header.message().parts_uuid()->str();
+    int32_t parts_index = log_header.message().parts_index();
+
+    auto log_it = parts_list.find(log_event_uuid);
+    if (log_it == parts_list.end()) {
+      log_it =
+          parts_list
+              .insert(std::make_pair(log_event_uuid,
+                                     std::map<std::string, UnsortedLogParts>()))
+              .first;
+    }
+
+    auto it = log_it->second.find(parts_uuid);
+    if (it == log_it->second.end()) {
+      it = log_it->second.insert(std::make_pair(parts_uuid, UnsortedLogParts()))
+               .first;
+      it->second.monotonic_start_time = monotonic_start_time;
+      it->second.realtime_start_time = realtime_start_time;
+      it->second.node = std::string(node);
+    }
+
+    // First part might be min_time.  If it is, try to put a better time on it.
+    if (it->second.monotonic_start_time == monotonic_clock::min_time) {
+      it->second.monotonic_start_time = monotonic_start_time;
+    } else if (monotonic_start_time != monotonic_clock::min_time) {
+      CHECK_EQ(it->second.monotonic_start_time, monotonic_start_time);
+    }
+    if (it->second.realtime_start_time == realtime_clock::min_time) {
+      it->second.realtime_start_time = realtime_start_time;
+    } else if (realtime_start_time != realtime_clock::min_time) {
+      CHECK_EQ(it->second.realtime_start_time, realtime_start_time);
+    }
+
+    it->second.parts.emplace_back(std::make_pair(part, parts_index));
+  }
+
+  CHECK_NE(old_parts.empty(), parts_list.empty())
+      << ": Can't have a mix of old and new parts.";
+
+  // Now reformat old_parts to be in the right datastructure to report.
+  if (!old_parts.empty()) {
+    std::vector<LogFile> result;
+    for (UnsortedOldParts &p : old_parts) {
+      // Sort by the oldest message in each file.
+      std::sort(
+          p.unsorted_parts.begin(), p.unsorted_parts.end(),
+          [](const std::pair<monotonic_clock::time_point, std::string> &a,
+             const std::pair<monotonic_clock::time_point, std::string> &b) {
+            return a.first < b.first;
+          });
+      LogFile log_file;
+      for (std::pair<monotonic_clock::time_point, std::string> &f :
+           p.unsorted_parts) {
+        p.parts.parts.emplace_back(std::move(f.second));
+      }
+      log_file.parts.emplace_back(std::move(p.parts));
+      result.emplace_back(std::move(log_file));
+    }
+
+    return result;
+  }
+
+  // Now, sort them and produce the final vector form.
+  std::vector<LogFile> result;
+  result.reserve(parts_list.size());
+  for (std::pair<const std::string, std::map<std::string, UnsortedLogParts>>
+           &logs : parts_list) {
+    LogFile new_file;
+    new_file.log_event_uuid = logs.first;
+    for (std::pair<const std::string, UnsortedLogParts> &parts : logs.second) {
+      LogParts new_parts;
+      new_parts.monotonic_start_time = parts.second.monotonic_start_time;
+      new_parts.realtime_start_time = parts.second.realtime_start_time;
+      new_parts.log_event_uuid = logs.first;
+      new_parts.parts_uuid = parts.first;
+      new_parts.node = std::move(parts.second.node);
+
+      std::sort(parts.second.parts.begin(), parts.second.parts.end(),
+                [](const std::pair<std::string, int> &a,
+                   const std::pair<std::string, int> &b) {
+                  return a.second < b.second;
+                });
+      new_parts.parts.reserve(parts.second.parts.size());
+      for (std::pair<std::string, int> &p : parts.second.parts) {
+        new_parts.parts.emplace_back(std::move(p.first));
+      }
+      new_file.parts.emplace_back(std::move(new_parts));
+    }
+    result.emplace_back(std::move(new_file));
+  }
+  return result;
+}
+
+std::ostream &operator<<(std::ostream &stream, const LogFile &file) {
+  stream << "{";
+  if (!file.log_event_uuid.empty()) {
+    stream << "\"log_event_uuid\": \"" << file.log_event_uuid << "\", ";
+  }
+  stream << "\"parts\": [";
+  for (size_t i = 0; i < file.parts.size(); ++i) {
+    if (i != 0u) {
+      stream << ", ";
+    }
+    stream << file.parts[i];
+  }
+  stream << "]}";
+  return stream;
+}
+std::ostream &operator<<(std::ostream &stream, const LogParts &parts) {
+  stream << "{";
+  if (!parts.log_event_uuid.empty()) {
+    stream << "\"log_event_uuid\": \"" << parts.log_event_uuid << "\", ";
+  }
+  if (!parts.parts_uuid.empty()) {
+    stream << "\"parts_uuid\": \"" << parts.parts_uuid << "\", ";
+  }
+  if (!parts.node.empty()) {
+    stream << "\"node\": \"" << parts.node << "\", ";
+  }
+  stream << "\"monotonic_start_time\": " << parts.monotonic_start_time
+         << ", \"realtime_start_time\": " << parts.realtime_start_time << ", [";
+
+  for (size_t i = 0; i < parts.parts.size(); ++i) {
+    if (i != 0u) {
+      stream << ", ";
+    }
+    stream << parts.parts[i];
+  }
+
+  stream << "]}";
+  return stream;
+}
+
+}  // namespace logger
+}  // namespace aos
diff --git a/aos/events/logging/logfile_sorting.h b/aos/events/logging/logfile_sorting.h
new file mode 100644
index 0000000..8b59dff
--- /dev/null
+++ b/aos/events/logging/logfile_sorting.h
@@ -0,0 +1,52 @@
+#ifndef AOS_EVENTS_LOGGING_LOGFILE_SORTING_H_
+#define AOS_EVENTS_LOGGING_LOGFILE_SORTING_H_
+
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include "aos/events/logging/uuid.h"
+#include "aos/time/time.h"
+
+namespace aos {
+namespace logger {
+
+// Datastructure to hold ordered parts.
+struct LogParts {
+  // Monotonic and realtime start times for this set of log files.  For log
+  // files which started out unknown and then became known, this is the known
+  // start time.
+  aos::monotonic_clock::time_point monotonic_start_time;
+  aos::realtime_clock::time_point realtime_start_time;
+
+  // UUIDs if available.
+  std::string log_event_uuid;
+  std::string parts_uuid;
+
+  // The node this represents, or empty if we are in a single node world.
+  std::string node;
+
+  // Pre-sorted list of parts.
+  std::vector<std::string> parts;
+};
+
+// Datastructure to hold parts from the same run of the logger which have no
+// ordering constraints relative to each other.
+struct LogFile {
+  // The UUID tying them all together (if available)
+  std::string log_event_uuid;
+
+  // All the parts, unsorted.
+  std::vector<LogParts> parts;
+};
+
+std::ostream &operator<<(std::ostream &stream, const LogFile &file);
+std::ostream &operator<<(std::ostream &stream, const LogParts &parts);
+
+// Takes a bunch of parts and sorts them based on part_uuid and part_index.
+std::vector<LogFile> SortParts(const std::vector<std::string> &parts);
+
+}  // namespace logger
+}  // namespace aos
+
+#endif  // AOS_EVENTS_LOGGING_LOGFILE_SORTING_H_
diff --git a/aos/events/logging/logger.cc b/aos/events/logging/logger.cc
index d206d5d..7d0040c 100644
--- a/aos/events/logging/logger.cc
+++ b/aos/events/logging/logger.cc
@@ -11,6 +11,7 @@
 #include "absl/strings/escaping.h"
 #include "absl/types/span.h"
 #include "aos/events/event_loop.h"
+#include "aos/events/logging/logfile_sorting.h"
 #include "aos/events/logging/logger_generated.h"
 #include "aos/events/logging/uuid.h"
 #include "aos/flatbuffer_merge.h"
@@ -681,226 +682,6 @@
   }
 }
 
-std::vector<LogFile> SortParts(const std::vector<std::string> &parts) {
-  // Start by grouping all parts by UUID, and extracting the part index.
-  // Datastructure to hold all the info extracted from a set of parts which go
-  // together so we can sort them afterwords.
-  struct UnsortedLogParts {
-    // Start times.
-    aos::monotonic_clock::time_point monotonic_start_time;
-    aos::realtime_clock::time_point realtime_start_time;
-
-    // Node to save.
-    std::string node;
-
-    // Pairs of the filename and the part index for sorting.
-    std::vector<std::pair<std::string, int>> parts;
-  };
-
-  // Map holding the log_event_uuid -> second map.  The second map holds the
-  // parts_uuid -> list of parts for sorting.
-  std::map<std::string, std::map<std::string, UnsortedLogParts>> parts_list;
-
-  // Sort part files without UUIDs and part indexes as well.  Extract everything
-  // useful from the log in the first pass, then sort later.
-  struct UnsortedOldParts {
-    // Part information with everything but the list of parts.
-    LogParts parts;
-
-    // Tuple of time for the data and filename needed for sorting after
-    // extracting.
-    std::vector<std::pair<monotonic_clock::time_point, std::string>>
-        unsorted_parts;
-  };
-
-  // A list of all the old parts which we don't know how to sort using uuids.
-  // There are enough of these in the wild that this is worth supporting.
-  std::vector<UnsortedOldParts> old_parts;
-
-  // Now extract everything into our datastructures above for sorting.
-  for (const std::string &part : parts) {
-    FlatbufferVector<LogFileHeader> log_header = ReadHeader(part);
-
-    const monotonic_clock::time_point monotonic_start_time(
-        chrono::nanoseconds(log_header.message().monotonic_start_time()));
-    const realtime_clock::time_point realtime_start_time(
-        chrono::nanoseconds(log_header.message().realtime_start_time()));
-
-    const std::string_view node =
-        log_header.message().has_node()
-            ? log_header.message().node()->name()->string_view()
-            : "";
-
-    // Looks like an old log.  No UUID, index, and also single node.  We have
-    // little to no multi-node log files in the wild without part UUIDs and
-    // indexes which we care much about.
-    if (!log_header.message().has_parts_uuid() &&
-        !log_header.message().has_parts_index() &&
-        !log_header.message().has_node()) {
-      FlatbufferVector<MessageHeader> first_message = ReadNthMessage(part, 0);
-      const monotonic_clock::time_point first_message_time(
-          chrono::nanoseconds(first_message.message().monotonic_sent_time()));
-
-      // Find anything with a matching start time.  They all go together.
-      auto result = std::find_if(
-          old_parts.begin(), old_parts.end(),
-          [&](const UnsortedOldParts &parts) {
-            return parts.parts.monotonic_start_time == monotonic_start_time &&
-                   parts.parts.realtime_start_time == realtime_start_time;
-          });
-
-      if (result == old_parts.end()) {
-        old_parts.emplace_back();
-        old_parts.back().parts.monotonic_start_time = monotonic_start_time;
-        old_parts.back().parts.realtime_start_time = realtime_start_time;
-        old_parts.back().unsorted_parts.emplace_back(
-            std::make_pair(first_message_time, part));
-      } else {
-        result->unsorted_parts.emplace_back(
-            std::make_pair(first_message_time, part));
-      }
-      continue;
-    }
-
-    CHECK(log_header.message().has_log_event_uuid());
-    CHECK(log_header.message().has_parts_uuid());
-    CHECK(log_header.message().has_parts_index());
-
-    const std::string log_event_uuid =
-        log_header.message().log_event_uuid()->str();
-    const std::string parts_uuid = log_header.message().parts_uuid()->str();
-    int32_t parts_index = log_header.message().parts_index();
-
-    auto log_it = parts_list.find(log_event_uuid);
-    if (log_it == parts_list.end()) {
-      log_it =
-          parts_list
-              .insert(std::make_pair(log_event_uuid,
-                                     std::map<std::string, UnsortedLogParts>()))
-              .first;
-    }
-
-    auto it = log_it->second.find(parts_uuid);
-    if (it == log_it->second.end()) {
-      it = log_it->second.insert(std::make_pair(parts_uuid, UnsortedLogParts()))
-               .first;
-      it->second.monotonic_start_time = monotonic_start_time;
-      it->second.realtime_start_time = realtime_start_time;
-      it->second.node = std::string(node);
-    }
-
-    // First part might be min_time.  If it is, try to put a better time on it.
-    if (it->second.monotonic_start_time == monotonic_clock::min_time) {
-      it->second.monotonic_start_time = monotonic_start_time;
-    } else if (monotonic_start_time != monotonic_clock::min_time) {
-      CHECK_EQ(it->second.monotonic_start_time, monotonic_start_time);
-    }
-    if (it->second.realtime_start_time == realtime_clock::min_time) {
-      it->second.realtime_start_time = realtime_start_time;
-    } else if (realtime_start_time != realtime_clock::min_time) {
-      CHECK_EQ(it->second.realtime_start_time, realtime_start_time);
-    }
-
-    it->second.parts.emplace_back(std::make_pair(part, parts_index));
-  }
-
-  CHECK_NE(old_parts.empty(), parts_list.empty())
-      << ": Can't have a mix of old and new parts.";
-
-  // Now reformat old_parts to be in the right datastructure to report.
-  if (!old_parts.empty()) {
-    std::vector<LogFile> result;
-    for (UnsortedOldParts &p : old_parts) {
-      // Sort by the oldest message in each file.
-      std::sort(
-          p.unsorted_parts.begin(), p.unsorted_parts.end(),
-          [](const std::pair<monotonic_clock::time_point, std::string> &a,
-             const std::pair<monotonic_clock::time_point, std::string> &b) {
-            return a.first < b.first;
-          });
-      LogFile log_file;
-      for (std::pair<monotonic_clock::time_point, std::string> &f :
-           p.unsorted_parts) {
-        p.parts.parts.emplace_back(std::move(f.second));
-      }
-      log_file.parts.emplace_back(std::move(p.parts));
-      result.emplace_back(std::move(log_file));
-    }
-
-    return result;
-  }
-
-  // Now, sort them and produce the final vector form.
-  std::vector<LogFile> result;
-  result.reserve(parts_list.size());
-  for (std::pair<const std::string, std::map<std::string, UnsortedLogParts>>
-           &logs : parts_list) {
-    LogFile new_file;
-    new_file.log_event_uuid = logs.first;
-    for (std::pair<const std::string, UnsortedLogParts> &parts : logs.second) {
-      LogParts new_parts;
-      new_parts.monotonic_start_time = parts.second.monotonic_start_time;
-      new_parts.realtime_start_time = parts.second.realtime_start_time;
-      new_parts.log_event_uuid = logs.first;
-      new_parts.parts_uuid = parts.first;
-      new_parts.node = std::move(parts.second.node);
-
-      std::sort(parts.second.parts.begin(), parts.second.parts.end(),
-                [](const std::pair<std::string, int> &a,
-                   const std::pair<std::string, int> &b) {
-                  return a.second < b.second;
-                });
-      new_parts.parts.reserve(parts.second.parts.size());
-      for (std::pair<std::string, int> &p : parts.second.parts) {
-        new_parts.parts.emplace_back(std::move(p.first));
-      }
-      new_file.parts.emplace_back(std::move(new_parts));
-    }
-    result.emplace_back(std::move(new_file));
-  }
-  return result;
-}
-
-std::ostream &operator<<(std::ostream &stream, const LogFile &file) {
-  stream << "{";
-  if (!file.log_event_uuid.empty()) {
-    stream << "\"log_event_uuid\": \"" << file.log_event_uuid << "\", ";
-  }
-  stream << "\"parts\": [";
-  for (size_t i = 0; i < file.parts.size(); ++i) {
-    if (i != 0u) {
-      stream << ", ";
-    }
-    stream << file.parts[i];
-  }
-  stream << "]}";
-  return stream;
-}
-std::ostream &operator<<(std::ostream &stream, const LogParts &parts) {
-  stream << "{";
-  if (!parts.log_event_uuid.empty()) {
-    stream << "\"log_event_uuid\": \"" << parts.log_event_uuid << "\", ";
-  }
-  if (!parts.parts_uuid.empty()) {
-    stream << "\"parts_uuid\": \"" << parts.parts_uuid << "\", ";
-  }
-  if (!parts.node.empty()) {
-    stream << "\"node\": \"" << parts.node << "\", ";
-  }
-  stream << "\"monotonic_start_time\": " << parts.monotonic_start_time
-         << ", \"realtime_start_time\": " << parts.realtime_start_time << ", [";
-
-  for (size_t i = 0; i < parts.parts.size(); ++i) {
-    if (i != 0u) {
-      stream << ", ";
-    }
-    stream << parts.parts[i];
-  }
-
-  stream << "]}";
-  return stream;
-}
-
 std::vector<std::vector<std::string>> ToLogReaderVector(
     const std::vector<LogFile> &log_files) {
   std::vector<std::vector<std::string>> result;
diff --git a/aos/events/logging/logger.h b/aos/events/logging/logger.h
index ca4630f..b37fea2 100644
--- a/aos/events/logging/logger.h
+++ b/aos/events/logging/logger.h
@@ -13,6 +13,7 @@
 #include "aos/events/event_loop.h"
 #include "aos/events/logging/eigen_mpq.h"
 #include "aos/events/logging/log_namer.h"
+#include "aos/events/logging/logfile_sorting.h"
 #include "aos/events/logging/logfile_utils.h"
 #include "aos/events/logging/logger_generated.h"
 #include "aos/events/logging/uuid.h"
@@ -279,41 +280,6 @@
   std::vector<NodeState> node_state_;
 };
 
-// Datastructure to hold ordered parts.
-struct LogParts {
-  // Monotonic and realtime start times for this set of log files.  For log
-  // files which started out unknown and then became known, this is the known
-  // start time.
-  aos::monotonic_clock::time_point monotonic_start_time;
-  aos::realtime_clock::time_point realtime_start_time;
-
-  // UUIDs if available.
-  std::string log_event_uuid;
-  std::string parts_uuid;
-
-  // The node this represents, or empty if we are in a single node world.
-  std::string node;
-
-  // Pre-sorted list of parts.
-  std::vector<std::string> parts;
-};
-
-// Datastructure to hold parts from the same run of the logger which have no
-// ordering constraints relative to each other.
-struct LogFile {
-  // The UUID tying them all together (if available)
-  std::string log_event_uuid;
-
-  // All the parts, unsorted.
-  std::vector<LogParts> parts;
-};
-
-std::ostream &operator<<(std::ostream &stream, const LogFile &file);
-std::ostream &operator<<(std::ostream &stream, const LogParts &parts);
-
-// Takes a bunch of parts and sorts them based on part_uuid and part_index.
-std::vector<LogFile> SortParts(const std::vector<std::string> &parts);
-
 std::vector<std::vector<std::string>> ToLogReaderVector(
     const std::vector<LogFile> &log_files);