Separate logfile sorting into a separate library.
logger.cc was getting big and I'd like to use the datastructures in
logfile sorting in my new code.
Change-Id: If136fb36f24843432723ead1e86c86b2b36f1509
diff --git a/aos/events/logging/BUILD b/aos/events/logging/BUILD
index d829edd..6a85728 100644
--- a/aos/events/logging/BUILD
+++ b/aos/events/logging/BUILD
@@ -14,14 +14,17 @@
cc_library(
name = "logfile_utils",
srcs = [
+ "logfile_sorting.cc",
"logfile_utils.cc",
],
hdrs = [
+ "logfile_sorting.h",
"logfile_utils.h",
],
visibility = ["//visibility:public"],
deps = [
":buffer_encoder",
+ ":uuid",
":logger_fbs",
"//aos:configuration",
"//aos:flatbuffer_merge",
diff --git a/aos/events/logging/logfile_sorting.cc b/aos/events/logging/logfile_sorting.cc
new file mode 100644
index 0000000..f8f08ba
--- /dev/null
+++ b/aos/events/logging/logfile_sorting.cc
@@ -0,0 +1,238 @@
+#include "aos/events/logging/logfile_sorting.h"
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "aos/events/logging/logfile_utils.h"
+#include "aos/flatbuffers.h"
+#include "aos/time/time.h"
+
+namespace aos {
+namespace logger {
+namespace chrono = std::chrono;
+
+std::vector<LogFile> SortParts(const std::vector<std::string> &parts) {
+ // Start by grouping all parts by UUID, and extracting the part index.
+ // Datastructure to hold all the info extracted from a set of parts which go
+ // together so we can sort them afterwords.
+ struct UnsortedLogParts {
+ // Start times.
+ aos::monotonic_clock::time_point monotonic_start_time;
+ aos::realtime_clock::time_point realtime_start_time;
+
+ // Node to save.
+ std::string node;
+
+ // Pairs of the filename and the part index for sorting.
+ std::vector<std::pair<std::string, int>> parts;
+ };
+
+ // Map holding the log_event_uuid -> second map. The second map holds the
+ // parts_uuid -> list of parts for sorting.
+ std::map<std::string, std::map<std::string, UnsortedLogParts>> parts_list;
+
+ // Sort part files without UUIDs and part indexes as well. Extract everything
+ // useful from the log in the first pass, then sort later.
+ struct UnsortedOldParts {
+ // Part information with everything but the list of parts.
+ LogParts parts;
+
+ // Tuple of time for the data and filename needed for sorting after
+ // extracting.
+ std::vector<std::pair<monotonic_clock::time_point, std::string>>
+ unsorted_parts;
+ };
+
+ // A list of all the old parts which we don't know how to sort using uuids.
+ // There are enough of these in the wild that this is worth supporting.
+ std::vector<UnsortedOldParts> old_parts;
+
+ // Now extract everything into our datastructures above for sorting.
+ for (const std::string &part : parts) {
+ FlatbufferVector<LogFileHeader> log_header = ReadHeader(part);
+
+ const monotonic_clock::time_point monotonic_start_time(
+ chrono::nanoseconds(log_header.message().monotonic_start_time()));
+ const realtime_clock::time_point realtime_start_time(
+ chrono::nanoseconds(log_header.message().realtime_start_time()));
+
+ const std::string_view node =
+ log_header.message().has_node()
+ ? log_header.message().node()->name()->string_view()
+ : "";
+
+ // Looks like an old log. No UUID, index, and also single node. We have
+ // little to no multi-node log files in the wild without part UUIDs and
+ // indexes which we care much about.
+ if (!log_header.message().has_parts_uuid() &&
+ !log_header.message().has_parts_index() &&
+ !log_header.message().has_node()) {
+ FlatbufferVector<MessageHeader> first_message = ReadNthMessage(part, 0);
+ const monotonic_clock::time_point first_message_time(
+ chrono::nanoseconds(first_message.message().monotonic_sent_time()));
+
+ // Find anything with a matching start time. They all go together.
+ auto result = std::find_if(
+ old_parts.begin(), old_parts.end(),
+ [&](const UnsortedOldParts &parts) {
+ return parts.parts.monotonic_start_time == monotonic_start_time &&
+ parts.parts.realtime_start_time == realtime_start_time;
+ });
+
+ if (result == old_parts.end()) {
+ old_parts.emplace_back();
+ old_parts.back().parts.monotonic_start_time = monotonic_start_time;
+ old_parts.back().parts.realtime_start_time = realtime_start_time;
+ old_parts.back().unsorted_parts.emplace_back(
+ std::make_pair(first_message_time, part));
+ } else {
+ result->unsorted_parts.emplace_back(
+ std::make_pair(first_message_time, part));
+ }
+ continue;
+ }
+
+ CHECK(log_header.message().has_log_event_uuid());
+ CHECK(log_header.message().has_parts_uuid());
+ CHECK(log_header.message().has_parts_index());
+
+ const std::string log_event_uuid =
+ log_header.message().log_event_uuid()->str();
+ const std::string parts_uuid = log_header.message().parts_uuid()->str();
+ int32_t parts_index = log_header.message().parts_index();
+
+ auto log_it = parts_list.find(log_event_uuid);
+ if (log_it == parts_list.end()) {
+ log_it =
+ parts_list
+ .insert(std::make_pair(log_event_uuid,
+ std::map<std::string, UnsortedLogParts>()))
+ .first;
+ }
+
+ auto it = log_it->second.find(parts_uuid);
+ if (it == log_it->second.end()) {
+ it = log_it->second.insert(std::make_pair(parts_uuid, UnsortedLogParts()))
+ .first;
+ it->second.monotonic_start_time = monotonic_start_time;
+ it->second.realtime_start_time = realtime_start_time;
+ it->second.node = std::string(node);
+ }
+
+ // First part might be min_time. If it is, try to put a better time on it.
+ if (it->second.monotonic_start_time == monotonic_clock::min_time) {
+ it->second.monotonic_start_time = monotonic_start_time;
+ } else if (monotonic_start_time != monotonic_clock::min_time) {
+ CHECK_EQ(it->second.monotonic_start_time, monotonic_start_time);
+ }
+ if (it->second.realtime_start_time == realtime_clock::min_time) {
+ it->second.realtime_start_time = realtime_start_time;
+ } else if (realtime_start_time != realtime_clock::min_time) {
+ CHECK_EQ(it->second.realtime_start_time, realtime_start_time);
+ }
+
+ it->second.parts.emplace_back(std::make_pair(part, parts_index));
+ }
+
+ CHECK_NE(old_parts.empty(), parts_list.empty())
+ << ": Can't have a mix of old and new parts.";
+
+ // Now reformat old_parts to be in the right datastructure to report.
+ if (!old_parts.empty()) {
+ std::vector<LogFile> result;
+ for (UnsortedOldParts &p : old_parts) {
+ // Sort by the oldest message in each file.
+ std::sort(
+ p.unsorted_parts.begin(), p.unsorted_parts.end(),
+ [](const std::pair<monotonic_clock::time_point, std::string> &a,
+ const std::pair<monotonic_clock::time_point, std::string> &b) {
+ return a.first < b.first;
+ });
+ LogFile log_file;
+ for (std::pair<monotonic_clock::time_point, std::string> &f :
+ p.unsorted_parts) {
+ p.parts.parts.emplace_back(std::move(f.second));
+ }
+ log_file.parts.emplace_back(std::move(p.parts));
+ result.emplace_back(std::move(log_file));
+ }
+
+ return result;
+ }
+
+ // Now, sort them and produce the final vector form.
+ std::vector<LogFile> result;
+ result.reserve(parts_list.size());
+ for (std::pair<const std::string, std::map<std::string, UnsortedLogParts>>
+ &logs : parts_list) {
+ LogFile new_file;
+ new_file.log_event_uuid = logs.first;
+ for (std::pair<const std::string, UnsortedLogParts> &parts : logs.second) {
+ LogParts new_parts;
+ new_parts.monotonic_start_time = parts.second.monotonic_start_time;
+ new_parts.realtime_start_time = parts.second.realtime_start_time;
+ new_parts.log_event_uuid = logs.first;
+ new_parts.parts_uuid = parts.first;
+ new_parts.node = std::move(parts.second.node);
+
+ std::sort(parts.second.parts.begin(), parts.second.parts.end(),
+ [](const std::pair<std::string, int> &a,
+ const std::pair<std::string, int> &b) {
+ return a.second < b.second;
+ });
+ new_parts.parts.reserve(parts.second.parts.size());
+ for (std::pair<std::string, int> &p : parts.second.parts) {
+ new_parts.parts.emplace_back(std::move(p.first));
+ }
+ new_file.parts.emplace_back(std::move(new_parts));
+ }
+ result.emplace_back(std::move(new_file));
+ }
+ return result;
+}
+
+std::ostream &operator<<(std::ostream &stream, const LogFile &file) {
+ stream << "{";
+ if (!file.log_event_uuid.empty()) {
+ stream << "\"log_event_uuid\": \"" << file.log_event_uuid << "\", ";
+ }
+ stream << "\"parts\": [";
+ for (size_t i = 0; i < file.parts.size(); ++i) {
+ if (i != 0u) {
+ stream << ", ";
+ }
+ stream << file.parts[i];
+ }
+ stream << "]}";
+ return stream;
+}
+std::ostream &operator<<(std::ostream &stream, const LogParts &parts) {
+ stream << "{";
+ if (!parts.log_event_uuid.empty()) {
+ stream << "\"log_event_uuid\": \"" << parts.log_event_uuid << "\", ";
+ }
+ if (!parts.parts_uuid.empty()) {
+ stream << "\"parts_uuid\": \"" << parts.parts_uuid << "\", ";
+ }
+ if (!parts.node.empty()) {
+ stream << "\"node\": \"" << parts.node << "\", ";
+ }
+ stream << "\"monotonic_start_time\": " << parts.monotonic_start_time
+ << ", \"realtime_start_time\": " << parts.realtime_start_time << ", [";
+
+ for (size_t i = 0; i < parts.parts.size(); ++i) {
+ if (i != 0u) {
+ stream << ", ";
+ }
+ stream << parts.parts[i];
+ }
+
+ stream << "]}";
+ return stream;
+}
+
+} // namespace logger
+} // namespace aos
diff --git a/aos/events/logging/logfile_sorting.h b/aos/events/logging/logfile_sorting.h
new file mode 100644
index 0000000..8b59dff
--- /dev/null
+++ b/aos/events/logging/logfile_sorting.h
@@ -0,0 +1,52 @@
+#ifndef AOS_EVENTS_LOGGING_LOGFILE_SORTING_H_
+#define AOS_EVENTS_LOGGING_LOGFILE_SORTING_H_
+
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include "aos/events/logging/uuid.h"
+#include "aos/time/time.h"
+
+namespace aos {
+namespace logger {
+
+// Datastructure to hold ordered parts.
+struct LogParts {
+ // Monotonic and realtime start times for this set of log files. For log
+ // files which started out unknown and then became known, this is the known
+ // start time.
+ aos::monotonic_clock::time_point monotonic_start_time;
+ aos::realtime_clock::time_point realtime_start_time;
+
+ // UUIDs if available.
+ std::string log_event_uuid;
+ std::string parts_uuid;
+
+ // The node this represents, or empty if we are in a single node world.
+ std::string node;
+
+ // Pre-sorted list of parts.
+ std::vector<std::string> parts;
+};
+
+// Datastructure to hold parts from the same run of the logger which have no
+// ordering constraints relative to each other.
+struct LogFile {
+ // The UUID tying them all together (if available)
+ std::string log_event_uuid;
+
+ // All the parts, unsorted.
+ std::vector<LogParts> parts;
+};
+
+std::ostream &operator<<(std::ostream &stream, const LogFile &file);
+std::ostream &operator<<(std::ostream &stream, const LogParts &parts);
+
+// Takes a bunch of parts and sorts them based on part_uuid and part_index.
+std::vector<LogFile> SortParts(const std::vector<std::string> &parts);
+
+} // namespace logger
+} // namespace aos
+
+#endif // AOS_EVENTS_LOGGING_LOGFILE_SORTING_H_
diff --git a/aos/events/logging/logger.cc b/aos/events/logging/logger.cc
index d206d5d..7d0040c 100644
--- a/aos/events/logging/logger.cc
+++ b/aos/events/logging/logger.cc
@@ -11,6 +11,7 @@
#include "absl/strings/escaping.h"
#include "absl/types/span.h"
#include "aos/events/event_loop.h"
+#include "aos/events/logging/logfile_sorting.h"
#include "aos/events/logging/logger_generated.h"
#include "aos/events/logging/uuid.h"
#include "aos/flatbuffer_merge.h"
@@ -681,226 +682,6 @@
}
}
-std::vector<LogFile> SortParts(const std::vector<std::string> &parts) {
- // Start by grouping all parts by UUID, and extracting the part index.
- // Datastructure to hold all the info extracted from a set of parts which go
- // together so we can sort them afterwords.
- struct UnsortedLogParts {
- // Start times.
- aos::monotonic_clock::time_point monotonic_start_time;
- aos::realtime_clock::time_point realtime_start_time;
-
- // Node to save.
- std::string node;
-
- // Pairs of the filename and the part index for sorting.
- std::vector<std::pair<std::string, int>> parts;
- };
-
- // Map holding the log_event_uuid -> second map. The second map holds the
- // parts_uuid -> list of parts for sorting.
- std::map<std::string, std::map<std::string, UnsortedLogParts>> parts_list;
-
- // Sort part files without UUIDs and part indexes as well. Extract everything
- // useful from the log in the first pass, then sort later.
- struct UnsortedOldParts {
- // Part information with everything but the list of parts.
- LogParts parts;
-
- // Tuple of time for the data and filename needed for sorting after
- // extracting.
- std::vector<std::pair<monotonic_clock::time_point, std::string>>
- unsorted_parts;
- };
-
- // A list of all the old parts which we don't know how to sort using uuids.
- // There are enough of these in the wild that this is worth supporting.
- std::vector<UnsortedOldParts> old_parts;
-
- // Now extract everything into our datastructures above for sorting.
- for (const std::string &part : parts) {
- FlatbufferVector<LogFileHeader> log_header = ReadHeader(part);
-
- const monotonic_clock::time_point monotonic_start_time(
- chrono::nanoseconds(log_header.message().monotonic_start_time()));
- const realtime_clock::time_point realtime_start_time(
- chrono::nanoseconds(log_header.message().realtime_start_time()));
-
- const std::string_view node =
- log_header.message().has_node()
- ? log_header.message().node()->name()->string_view()
- : "";
-
- // Looks like an old log. No UUID, index, and also single node. We have
- // little to no multi-node log files in the wild without part UUIDs and
- // indexes which we care much about.
- if (!log_header.message().has_parts_uuid() &&
- !log_header.message().has_parts_index() &&
- !log_header.message().has_node()) {
- FlatbufferVector<MessageHeader> first_message = ReadNthMessage(part, 0);
- const monotonic_clock::time_point first_message_time(
- chrono::nanoseconds(first_message.message().monotonic_sent_time()));
-
- // Find anything with a matching start time. They all go together.
- auto result = std::find_if(
- old_parts.begin(), old_parts.end(),
- [&](const UnsortedOldParts &parts) {
- return parts.parts.monotonic_start_time == monotonic_start_time &&
- parts.parts.realtime_start_time == realtime_start_time;
- });
-
- if (result == old_parts.end()) {
- old_parts.emplace_back();
- old_parts.back().parts.monotonic_start_time = monotonic_start_time;
- old_parts.back().parts.realtime_start_time = realtime_start_time;
- old_parts.back().unsorted_parts.emplace_back(
- std::make_pair(first_message_time, part));
- } else {
- result->unsorted_parts.emplace_back(
- std::make_pair(first_message_time, part));
- }
- continue;
- }
-
- CHECK(log_header.message().has_log_event_uuid());
- CHECK(log_header.message().has_parts_uuid());
- CHECK(log_header.message().has_parts_index());
-
- const std::string log_event_uuid =
- log_header.message().log_event_uuid()->str();
- const std::string parts_uuid = log_header.message().parts_uuid()->str();
- int32_t parts_index = log_header.message().parts_index();
-
- auto log_it = parts_list.find(log_event_uuid);
- if (log_it == parts_list.end()) {
- log_it =
- parts_list
- .insert(std::make_pair(log_event_uuid,
- std::map<std::string, UnsortedLogParts>()))
- .first;
- }
-
- auto it = log_it->second.find(parts_uuid);
- if (it == log_it->second.end()) {
- it = log_it->second.insert(std::make_pair(parts_uuid, UnsortedLogParts()))
- .first;
- it->second.monotonic_start_time = monotonic_start_time;
- it->second.realtime_start_time = realtime_start_time;
- it->second.node = std::string(node);
- }
-
- // First part might be min_time. If it is, try to put a better time on it.
- if (it->second.monotonic_start_time == monotonic_clock::min_time) {
- it->second.monotonic_start_time = monotonic_start_time;
- } else if (monotonic_start_time != monotonic_clock::min_time) {
- CHECK_EQ(it->second.monotonic_start_time, monotonic_start_time);
- }
- if (it->second.realtime_start_time == realtime_clock::min_time) {
- it->second.realtime_start_time = realtime_start_time;
- } else if (realtime_start_time != realtime_clock::min_time) {
- CHECK_EQ(it->second.realtime_start_time, realtime_start_time);
- }
-
- it->second.parts.emplace_back(std::make_pair(part, parts_index));
- }
-
- CHECK_NE(old_parts.empty(), parts_list.empty())
- << ": Can't have a mix of old and new parts.";
-
- // Now reformat old_parts to be in the right datastructure to report.
- if (!old_parts.empty()) {
- std::vector<LogFile> result;
- for (UnsortedOldParts &p : old_parts) {
- // Sort by the oldest message in each file.
- std::sort(
- p.unsorted_parts.begin(), p.unsorted_parts.end(),
- [](const std::pair<monotonic_clock::time_point, std::string> &a,
- const std::pair<monotonic_clock::time_point, std::string> &b) {
- return a.first < b.first;
- });
- LogFile log_file;
- for (std::pair<monotonic_clock::time_point, std::string> &f :
- p.unsorted_parts) {
- p.parts.parts.emplace_back(std::move(f.second));
- }
- log_file.parts.emplace_back(std::move(p.parts));
- result.emplace_back(std::move(log_file));
- }
-
- return result;
- }
-
- // Now, sort them and produce the final vector form.
- std::vector<LogFile> result;
- result.reserve(parts_list.size());
- for (std::pair<const std::string, std::map<std::string, UnsortedLogParts>>
- &logs : parts_list) {
- LogFile new_file;
- new_file.log_event_uuid = logs.first;
- for (std::pair<const std::string, UnsortedLogParts> &parts : logs.second) {
- LogParts new_parts;
- new_parts.monotonic_start_time = parts.second.monotonic_start_time;
- new_parts.realtime_start_time = parts.second.realtime_start_time;
- new_parts.log_event_uuid = logs.first;
- new_parts.parts_uuid = parts.first;
- new_parts.node = std::move(parts.second.node);
-
- std::sort(parts.second.parts.begin(), parts.second.parts.end(),
- [](const std::pair<std::string, int> &a,
- const std::pair<std::string, int> &b) {
- return a.second < b.second;
- });
- new_parts.parts.reserve(parts.second.parts.size());
- for (std::pair<std::string, int> &p : parts.second.parts) {
- new_parts.parts.emplace_back(std::move(p.first));
- }
- new_file.parts.emplace_back(std::move(new_parts));
- }
- result.emplace_back(std::move(new_file));
- }
- return result;
-}
-
-std::ostream &operator<<(std::ostream &stream, const LogFile &file) {
- stream << "{";
- if (!file.log_event_uuid.empty()) {
- stream << "\"log_event_uuid\": \"" << file.log_event_uuid << "\", ";
- }
- stream << "\"parts\": [";
- for (size_t i = 0; i < file.parts.size(); ++i) {
- if (i != 0u) {
- stream << ", ";
- }
- stream << file.parts[i];
- }
- stream << "]}";
- return stream;
-}
-std::ostream &operator<<(std::ostream &stream, const LogParts &parts) {
- stream << "{";
- if (!parts.log_event_uuid.empty()) {
- stream << "\"log_event_uuid\": \"" << parts.log_event_uuid << "\", ";
- }
- if (!parts.parts_uuid.empty()) {
- stream << "\"parts_uuid\": \"" << parts.parts_uuid << "\", ";
- }
- if (!parts.node.empty()) {
- stream << "\"node\": \"" << parts.node << "\", ";
- }
- stream << "\"monotonic_start_time\": " << parts.monotonic_start_time
- << ", \"realtime_start_time\": " << parts.realtime_start_time << ", [";
-
- for (size_t i = 0; i < parts.parts.size(); ++i) {
- if (i != 0u) {
- stream << ", ";
- }
- stream << parts.parts[i];
- }
-
- stream << "]}";
- return stream;
-}
-
std::vector<std::vector<std::string>> ToLogReaderVector(
const std::vector<LogFile> &log_files) {
std::vector<std::vector<std::string>> result;
diff --git a/aos/events/logging/logger.h b/aos/events/logging/logger.h
index ca4630f..b37fea2 100644
--- a/aos/events/logging/logger.h
+++ b/aos/events/logging/logger.h
@@ -13,6 +13,7 @@
#include "aos/events/event_loop.h"
#include "aos/events/logging/eigen_mpq.h"
#include "aos/events/logging/log_namer.h"
+#include "aos/events/logging/logfile_sorting.h"
#include "aos/events/logging/logfile_utils.h"
#include "aos/events/logging/logger_generated.h"
#include "aos/events/logging/uuid.h"
@@ -279,41 +280,6 @@
std::vector<NodeState> node_state_;
};
-// Datastructure to hold ordered parts.
-struct LogParts {
- // Monotonic and realtime start times for this set of log files. For log
- // files which started out unknown and then became known, this is the known
- // start time.
- aos::monotonic_clock::time_point monotonic_start_time;
- aos::realtime_clock::time_point realtime_start_time;
-
- // UUIDs if available.
- std::string log_event_uuid;
- std::string parts_uuid;
-
- // The node this represents, or empty if we are in a single node world.
- std::string node;
-
- // Pre-sorted list of parts.
- std::vector<std::string> parts;
-};
-
-// Datastructure to hold parts from the same run of the logger which have no
-// ordering constraints relative to each other.
-struct LogFile {
- // The UUID tying them all together (if available)
- std::string log_event_uuid;
-
- // All the parts, unsorted.
- std::vector<LogParts> parts;
-};
-
-std::ostream &operator<<(std::ostream &stream, const LogFile &file);
-std::ostream &operator<<(std::ostream &stream, const LogParts &parts);
-
-// Takes a bunch of parts and sorts them based on part_uuid and part_index.
-std::vector<LogFile> SortParts(const std::vector<std::string> &parts);
-
std::vector<std::vector<std::string>> ToLogReaderVector(
const std::vector<LogFile> &log_files);