Separate logfile sorting into a separate library.
logger.cc was getting big and I'd like to use the datastructures in
logfile sorting in my new code.
Change-Id: If136fb36f24843432723ead1e86c86b2b36f1509
diff --git a/aos/events/logging/logger.cc b/aos/events/logging/logger.cc
index d206d5d..7d0040c 100644
--- a/aos/events/logging/logger.cc
+++ b/aos/events/logging/logger.cc
@@ -11,6 +11,7 @@
#include "absl/strings/escaping.h"
#include "absl/types/span.h"
#include "aos/events/event_loop.h"
+#include "aos/events/logging/logfile_sorting.h"
#include "aos/events/logging/logger_generated.h"
#include "aos/events/logging/uuid.h"
#include "aos/flatbuffer_merge.h"
@@ -681,226 +682,6 @@
}
}
-std::vector<LogFile> SortParts(const std::vector<std::string> &parts) {
- // Start by grouping all parts by UUID, and extracting the part index.
- // Datastructure to hold all the info extracted from a set of parts which go
- // together so we can sort them afterwords.
- struct UnsortedLogParts {
- // Start times.
- aos::monotonic_clock::time_point monotonic_start_time;
- aos::realtime_clock::time_point realtime_start_time;
-
- // Node to save.
- std::string node;
-
- // Pairs of the filename and the part index for sorting.
- std::vector<std::pair<std::string, int>> parts;
- };
-
- // Map holding the log_event_uuid -> second map. The second map holds the
- // parts_uuid -> list of parts for sorting.
- std::map<std::string, std::map<std::string, UnsortedLogParts>> parts_list;
-
- // Sort part files without UUIDs and part indexes as well. Extract everything
- // useful from the log in the first pass, then sort later.
- struct UnsortedOldParts {
- // Part information with everything but the list of parts.
- LogParts parts;
-
- // Tuple of time for the data and filename needed for sorting after
- // extracting.
- std::vector<std::pair<monotonic_clock::time_point, std::string>>
- unsorted_parts;
- };
-
- // A list of all the old parts which we don't know how to sort using uuids.
- // There are enough of these in the wild that this is worth supporting.
- std::vector<UnsortedOldParts> old_parts;
-
- // Now extract everything into our datastructures above for sorting.
- for (const std::string &part : parts) {
- FlatbufferVector<LogFileHeader> log_header = ReadHeader(part);
-
- const monotonic_clock::time_point monotonic_start_time(
- chrono::nanoseconds(log_header.message().monotonic_start_time()));
- const realtime_clock::time_point realtime_start_time(
- chrono::nanoseconds(log_header.message().realtime_start_time()));
-
- const std::string_view node =
- log_header.message().has_node()
- ? log_header.message().node()->name()->string_view()
- : "";
-
- // Looks like an old log. No UUID, index, and also single node. We have
- // little to no multi-node log files in the wild without part UUIDs and
- // indexes which we care much about.
- if (!log_header.message().has_parts_uuid() &&
- !log_header.message().has_parts_index() &&
- !log_header.message().has_node()) {
- FlatbufferVector<MessageHeader> first_message = ReadNthMessage(part, 0);
- const monotonic_clock::time_point first_message_time(
- chrono::nanoseconds(first_message.message().monotonic_sent_time()));
-
- // Find anything with a matching start time. They all go together.
- auto result = std::find_if(
- old_parts.begin(), old_parts.end(),
- [&](const UnsortedOldParts &parts) {
- return parts.parts.monotonic_start_time == monotonic_start_time &&
- parts.parts.realtime_start_time == realtime_start_time;
- });
-
- if (result == old_parts.end()) {
- old_parts.emplace_back();
- old_parts.back().parts.monotonic_start_time = monotonic_start_time;
- old_parts.back().parts.realtime_start_time = realtime_start_time;
- old_parts.back().unsorted_parts.emplace_back(
- std::make_pair(first_message_time, part));
- } else {
- result->unsorted_parts.emplace_back(
- std::make_pair(first_message_time, part));
- }
- continue;
- }
-
- CHECK(log_header.message().has_log_event_uuid());
- CHECK(log_header.message().has_parts_uuid());
- CHECK(log_header.message().has_parts_index());
-
- const std::string log_event_uuid =
- log_header.message().log_event_uuid()->str();
- const std::string parts_uuid = log_header.message().parts_uuid()->str();
- int32_t parts_index = log_header.message().parts_index();
-
- auto log_it = parts_list.find(log_event_uuid);
- if (log_it == parts_list.end()) {
- log_it =
- parts_list
- .insert(std::make_pair(log_event_uuid,
- std::map<std::string, UnsortedLogParts>()))
- .first;
- }
-
- auto it = log_it->second.find(parts_uuid);
- if (it == log_it->second.end()) {
- it = log_it->second.insert(std::make_pair(parts_uuid, UnsortedLogParts()))
- .first;
- it->second.monotonic_start_time = monotonic_start_time;
- it->second.realtime_start_time = realtime_start_time;
- it->second.node = std::string(node);
- }
-
- // First part might be min_time. If it is, try to put a better time on it.
- if (it->second.monotonic_start_time == monotonic_clock::min_time) {
- it->second.monotonic_start_time = monotonic_start_time;
- } else if (monotonic_start_time != monotonic_clock::min_time) {
- CHECK_EQ(it->second.monotonic_start_time, monotonic_start_time);
- }
- if (it->second.realtime_start_time == realtime_clock::min_time) {
- it->second.realtime_start_time = realtime_start_time;
- } else if (realtime_start_time != realtime_clock::min_time) {
- CHECK_EQ(it->second.realtime_start_time, realtime_start_time);
- }
-
- it->second.parts.emplace_back(std::make_pair(part, parts_index));
- }
-
- CHECK_NE(old_parts.empty(), parts_list.empty())
- << ": Can't have a mix of old and new parts.";
-
- // Now reformat old_parts to be in the right datastructure to report.
- if (!old_parts.empty()) {
- std::vector<LogFile> result;
- for (UnsortedOldParts &p : old_parts) {
- // Sort by the oldest message in each file.
- std::sort(
- p.unsorted_parts.begin(), p.unsorted_parts.end(),
- [](const std::pair<monotonic_clock::time_point, std::string> &a,
- const std::pair<monotonic_clock::time_point, std::string> &b) {
- return a.first < b.first;
- });
- LogFile log_file;
- for (std::pair<monotonic_clock::time_point, std::string> &f :
- p.unsorted_parts) {
- p.parts.parts.emplace_back(std::move(f.second));
- }
- log_file.parts.emplace_back(std::move(p.parts));
- result.emplace_back(std::move(log_file));
- }
-
- return result;
- }
-
- // Now, sort them and produce the final vector form.
- std::vector<LogFile> result;
- result.reserve(parts_list.size());
- for (std::pair<const std::string, std::map<std::string, UnsortedLogParts>>
- &logs : parts_list) {
- LogFile new_file;
- new_file.log_event_uuid = logs.first;
- for (std::pair<const std::string, UnsortedLogParts> &parts : logs.second) {
- LogParts new_parts;
- new_parts.monotonic_start_time = parts.second.monotonic_start_time;
- new_parts.realtime_start_time = parts.second.realtime_start_time;
- new_parts.log_event_uuid = logs.first;
- new_parts.parts_uuid = parts.first;
- new_parts.node = std::move(parts.second.node);
-
- std::sort(parts.second.parts.begin(), parts.second.parts.end(),
- [](const std::pair<std::string, int> &a,
- const std::pair<std::string, int> &b) {
- return a.second < b.second;
- });
- new_parts.parts.reserve(parts.second.parts.size());
- for (std::pair<std::string, int> &p : parts.second.parts) {
- new_parts.parts.emplace_back(std::move(p.first));
- }
- new_file.parts.emplace_back(std::move(new_parts));
- }
- result.emplace_back(std::move(new_file));
- }
- return result;
-}
-
-std::ostream &operator<<(std::ostream &stream, const LogFile &file) {
- stream << "{";
- if (!file.log_event_uuid.empty()) {
- stream << "\"log_event_uuid\": \"" << file.log_event_uuid << "\", ";
- }
- stream << "\"parts\": [";
- for (size_t i = 0; i < file.parts.size(); ++i) {
- if (i != 0u) {
- stream << ", ";
- }
- stream << file.parts[i];
- }
- stream << "]}";
- return stream;
-}
-std::ostream &operator<<(std::ostream &stream, const LogParts &parts) {
- stream << "{";
- if (!parts.log_event_uuid.empty()) {
- stream << "\"log_event_uuid\": \"" << parts.log_event_uuid << "\", ";
- }
- if (!parts.parts_uuid.empty()) {
- stream << "\"parts_uuid\": \"" << parts.parts_uuid << "\", ";
- }
- if (!parts.node.empty()) {
- stream << "\"node\": \"" << parts.node << "\", ";
- }
- stream << "\"monotonic_start_time\": " << parts.monotonic_start_time
- << ", \"realtime_start_time\": " << parts.realtime_start_time << ", [";
-
- for (size_t i = 0; i < parts.parts.size(); ++i) {
- if (i != 0u) {
- stream << ", ";
- }
- stream << parts.parts[i];
- }
-
- stream << "]}";
- return stream;
-}
-
std::vector<std::vector<std::string>> ToLogReaderVector(
const std::vector<LogFile> &log_files) {
std::vector<std::vector<std::string>> result;