blob: 2b08b59dbe0a3d20ce6fb7032075d934ddc39289 [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#ifndef AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
2#define AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
3
4#include <sys/uio.h>
5
Austin Schuh97789fc2020-08-01 14:42:45 -07006#include <chrono>
Austin Schuh05b70472020-01-01 17:11:17 -08007#include <deque>
Austin Schuh97789fc2020-08-01 14:42:45 -07008#include <limits>
9#include <memory>
Austin Schuh05b70472020-01-01 17:11:17 -080010#include <optional>
Austin Schuhfa895892020-01-07 20:07:41 -080011#include <string>
Austin Schuha36c8902019-12-30 18:07:15 -080012#include <string_view>
Brian Silverman98360e22020-04-28 16:51:20 -070013#include <tuple>
Austin Schuh97789fc2020-08-01 14:42:45 -070014#include <utility>
Austin Schuha36c8902019-12-30 18:07:15 -080015#include <vector>
16
Austin Schuh05b70472020-01-01 17:11:17 -080017#include "absl/types/span.h"
Austin Schuha36c8902019-12-30 18:07:15 -080018#include "aos/events/event_loop.h"
19#include "aos/events/logging/logger_generated.h"
20#include "flatbuffers/flatbuffers.h"
21
22namespace aos {
23namespace logger {
24
25enum class LogType : uint8_t {
26 // The message originated on this node and should be logged here.
27 kLogMessage,
28 // The message originated on another node, but only the delivery times are
29 // logged here.
30 kLogDeliveryTimeOnly,
31 // The message originated on another node. Log it and the delivery times
32 // together. The message_gateway is responsible for logging any messages
33 // which didn't get delivered.
Austin Schuh6f3babe2020-01-26 20:34:50 -080034 kLogMessageAndDeliveryTime,
35 // The message originated on the other node and should be logged on this node.
36 kLogRemoteMessage
Austin Schuha36c8902019-12-30 18:07:15 -080037};
38
Austin Schuha36c8902019-12-30 18:07:15 -080039// This class manages efficiently writing a sequence of detached buffers to a
40// file. It queues them up and batches the write operation.
41class DetachedBufferWriter {
42 public:
43 DetachedBufferWriter(std::string_view filename);
44 ~DetachedBufferWriter();
45
Brian Silverman98360e22020-04-28 16:51:20 -070046 DetachedBufferWriter(const DetachedBufferWriter &) = delete;
47 DetachedBufferWriter &operator=(const DetachedBufferWriter &) = delete;
48
Austin Schuh6f3babe2020-01-26 20:34:50 -080049 std::string_view filename() const { return filename_; }
50
Austin Schuha36c8902019-12-30 18:07:15 -080051 // TODO(austin): Snappy compress the log file if it ends with .snappy!
52
53 // Queues up a finished FlatBufferBuilder to be written. Steals the detached
54 // buffer from it.
55 void QueueSizedFlatbuffer(flatbuffers::FlatBufferBuilder *fbb);
56 // Queues up a detached buffer directly.
57 void QueueSizedFlatbuffer(flatbuffers::DetachedBuffer &&buffer);
Austin Schuhde031b72020-01-10 19:34:41 -080058 // Writes a Span. This is not terribly optimized right now.
59 void WriteSizedFlatbuffer(absl::Span<const uint8_t> span);
Austin Schuha36c8902019-12-30 18:07:15 -080060
61 // Triggers data to be provided to the kernel and written.
62 void Flush();
63
Brian Silverman98360e22020-04-28 16:51:20 -070064 // Returns the number of bytes written.
65 size_t written_size() const { return written_size_; }
66
67 // Returns the number of bytes written or currently queued.
68 size_t total_size() const { return written_size_ + queued_size_; }
69
Austin Schuha36c8902019-12-30 18:07:15 -080070 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -080071 const std::string filename_;
72
Austin Schuha36c8902019-12-30 18:07:15 -080073 int fd_ = -1;
74
75 // Size of all the data in the queue.
76 size_t queued_size_ = 0;
Brian Silverman98360e22020-04-28 16:51:20 -070077 size_t written_size_ = 0;
Austin Schuha36c8902019-12-30 18:07:15 -080078
79 // List of buffers to flush.
80 std::vector<flatbuffers::DetachedBuffer> queue_;
81 // List of iovecs to use with writev. This is a member variable to avoid
82 // churn.
83 std::vector<struct iovec> iovec_;
84};
85
86// Packes a message pointed to by the context into a MessageHeader.
87flatbuffers::Offset<MessageHeader> PackMessage(
88 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
89 int channel_index, LogType log_type);
90
Austin Schuh6f3babe2020-01-26 20:34:50 -080091FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename);
92
Austin Schuh05b70472020-01-01 17:11:17 -080093// Class to read chunks out of a log file.
94class SpanReader {
95 public:
96 SpanReader(std::string_view filename);
Austin Schuha36c8902019-12-30 18:07:15 -080097
Austin Schuh05b70472020-01-01 17:11:17 -080098 ~SpanReader() { close(fd_); }
99
Austin Schuh6f3babe2020-01-26 20:34:50 -0800100 std::string_view filename() const { return filename_; }
101
Austin Schuh05b70472020-01-01 17:11:17 -0800102 // Returns a span with the data for a message from the log file, excluding
103 // the size.
104 absl::Span<const uint8_t> ReadMessage();
105
106 // Returns true if there is a full message available in the buffer, or if we
107 // will have to read more data from disk.
108 bool MessageAvailable();
109
110 private:
111 // TODO(austin): Optimization:
112 // Allocate the 256k blocks like we do today. But, refcount them with
113 // shared_ptr pointed to by the messageheader that is returned. This avoids
114 // the copy. Need to do more benchmarking.
115
116 // Reads a chunk of data into data_. Returns false if no data was read.
117 bool ReadBlock();
118
Austin Schuh6f3babe2020-01-26 20:34:50 -0800119 const std::string filename_;
120
Austin Schuh05b70472020-01-01 17:11:17 -0800121 // File descriptor for the log file.
122 int fd_ = -1;
123
124 // Allocator which doesn't zero initialize memory.
125 template <typename T>
126 struct DefaultInitAllocator {
127 typedef T value_type;
128
129 template <typename U>
130 void construct(U *p) {
131 ::new (static_cast<void *>(p)) U;
132 }
133
134 template <typename U, typename... Args>
135 void construct(U *p, Args &&... args) {
136 ::new (static_cast<void *>(p)) U(std::forward<Args>(args)...);
137 }
138
139 T *allocate(std::size_t n) {
140 return reinterpret_cast<T *>(::operator new(sizeof(T) * n));
141 }
142
143 template <typename U>
144 void deallocate(U *p, std::size_t /*n*/) {
145 ::operator delete(static_cast<void *>(p));
146 }
147 };
148
149 // Vector to read into. This uses an allocator which doesn't zero
150 // initialize the memory.
151 std::vector<uint8_t, DefaultInitAllocator<uint8_t>> data_;
152
153 // Amount of data consumed already in data_.
154 size_t consumed_data_ = 0;
155
156 // Cached bit for if we have reached the end of the file. Otherwise we will
157 // hammer on the kernel asking for more data each time we send.
158 bool end_of_file_ = false;
159};
160
161// Class which handles reading the header and messages from the log file. This
162// handles any per-file state left before merging below.
163class MessageReader {
164 public:
165 MessageReader(std::string_view filename);
166
Austin Schuh6f3babe2020-01-26 20:34:50 -0800167 std::string_view filename() const { return span_reader_.filename(); }
168
Austin Schuh05b70472020-01-01 17:11:17 -0800169 // Returns the header from the log file.
170 const LogFileHeader *log_file_header() const {
Austin Schuh97789fc2020-08-01 14:42:45 -0700171 return &raw_log_file_header_.message();
172 }
173
174 // Returns the raw data of the header from the log file.
175 const FlatbufferVector<LogFileHeader> &raw_log_file_header() const {
176 return raw_log_file_header_;
Austin Schuh05b70472020-01-01 17:11:17 -0800177 }
178
179 // Returns the minimum maount of data needed to queue up for sorting before
180 // ware guarenteed to not see data out of order.
181 std::chrono::nanoseconds max_out_of_order_duration() const {
182 return max_out_of_order_duration_;
183 }
184
Austin Schuhcde938c2020-02-02 17:30:07 -0800185 // Returns the newest timestamp read out of the log file.
Austin Schuh05b70472020-01-01 17:11:17 -0800186 monotonic_clock::time_point newest_timestamp() const {
187 return newest_timestamp_;
188 }
189
190 // Returns the next message if there is one.
191 std::optional<FlatbufferVector<MessageHeader>> ReadMessage();
192
193 // The time at which we need to read another chunk from the logfile.
194 monotonic_clock::time_point queue_data_time() const {
195 return newest_timestamp() - max_out_of_order_duration();
196 }
197
198 private:
199 // Log chunk reader.
200 SpanReader span_reader_;
201
Austin Schuh97789fc2020-08-01 14:42:45 -0700202 // Vector holding the raw data for the log file header.
203 FlatbufferVector<LogFileHeader> raw_log_file_header_;
Austin Schuh05b70472020-01-01 17:11:17 -0800204
205 // Minimum amount of data to queue up for sorting before we are guarenteed
206 // to not see data out of order.
207 std::chrono::nanoseconds max_out_of_order_duration_;
208
209 // Timestamp of the newest message in a channel queue.
210 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
211};
212
Austin Schuh6f3babe2020-01-26 20:34:50 -0800213class TimestampMerger;
Austin Schuh05b70472020-01-01 17:11:17 -0800214
Austin Schuh6f3babe2020-01-26 20:34:50 -0800215// A design requirement is that the relevant data for a channel is not more than
216// max_out_of_order_duration out of order. We approach sorting in layers.
217//
218// 1) Split each (maybe chunked) log file into one queue per channel. Read this
219// log file looking for data pertaining to a specific node.
220// (SplitMessageReader)
221// 2) Merge all the data per channel from the different log files into a sorted
222// list of timestamps and messages. (TimestampMerger)
223// 3) Combine the timestamps and messages. (TimestampMerger)
224// 4) Merge all the channels to produce the next message on a node.
225// (ChannelMerger)
226// 5) Duplicate this entire stack per node.
227
228// This class splits messages and timestamps up into a queue per channel, and
229// handles reading data from multiple chunks.
230class SplitMessageReader {
231 public:
232 SplitMessageReader(const std::vector<std::string> &filenames);
233
234 // Sets the TimestampMerger that gets notified for each channel. The node
235 // that the TimestampMerger is merging as needs to be passed in.
236 void SetTimestampMerger(TimestampMerger *timestamp_merger, int channel,
237 const Node *target_node);
238
239 // Returns the (timestamp, queue_idex) for the oldest message in a channel, or
240 // max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800241 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
242 oldest_message(int channel) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800243 return channels_[channel].data.front_timestamp();
244 }
245
246 // Returns the (timestamp, queue_index) for the oldest delivery time in a
247 // channel, or max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800248 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
249 oldest_message(int channel, int destination_node) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800250 return channels_[channel].timestamps[destination_node].front_timestamp();
251 }
252
253 // Returns the timestamp, queue_index, and message for the oldest data on a
254 // channel. Requeues data as needed.
255 std::tuple<monotonic_clock::time_point, uint32_t,
256 FlatbufferVector<MessageHeader>>
257 PopOldest(int channel_index);
258
259 // Returns the timestamp, queue_index, and message for the oldest timestamp on
260 // a channel delivered to a node. Requeues data as needed.
261 std::tuple<monotonic_clock::time_point, uint32_t,
262 FlatbufferVector<MessageHeader>>
263 PopOldest(int channel, int node_index);
264
265 // Returns the header for the log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800266 const LogFileHeader *log_file_header() const {
Austin Schuhfa895892020-01-07 20:07:41 -0800267 return &log_file_header_.message();
Austin Schuh05b70472020-01-01 17:11:17 -0800268 }
269
Austin Schuh97789fc2020-08-01 14:42:45 -0700270 const FlatbufferVector<LogFileHeader> &raw_log_file_header() const {
271 return log_file_header_;
272 }
273
Austin Schuh6f3babe2020-01-26 20:34:50 -0800274 // Returns the starting time for this set of log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800275 monotonic_clock::time_point monotonic_start_time() {
276 return monotonic_clock::time_point(
277 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
278 }
279 realtime_clock::time_point realtime_start_time() {
280 return realtime_clock::time_point(
281 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
282 }
283
Austin Schuh6f3babe2020-01-26 20:34:50 -0800284 // Returns the configuration from the log file header.
285 const Configuration *configuration() const {
286 return log_file_header()->configuration();
287 }
288
Austin Schuh05b70472020-01-01 17:11:17 -0800289 // Returns the node who's point of view this log file is from. Make sure this
290 // is a pointer in the configuration() nodes list so it can be consumed
291 // elsewhere.
292 const Node *node() const {
293 if (configuration()->has_nodes()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800294 return configuration::GetNodeOrDie(configuration(),
295 log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800296 } else {
297 CHECK(!log_file_header()->has_node());
298 return nullptr;
299 }
300 }
301
Austin Schuh6f3babe2020-01-26 20:34:50 -0800302 // Returns the timestamp of the newest message read from the log file, and the
303 // timestamp that we need to re-queue data.
304 monotonic_clock::time_point newest_timestamp() const {
Austin Schuhcde938c2020-02-02 17:30:07 -0800305 return newest_timestamp_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800306 }
307
Austin Schuhcde938c2020-02-02 17:30:07 -0800308 // Returns the next time to trigger a requeue.
309 monotonic_clock::time_point time_to_queue() const { return time_to_queue_; }
310
311 // Returns the minimum amount of data needed to queue up for sorting before
312 // ware guarenteed to not see data out of order.
313 std::chrono::nanoseconds max_out_of_order_duration() const {
314 return message_reader_->max_out_of_order_duration();
315 }
316
317 std::string_view filename() const { return message_reader_->filename(); }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800318
319 // Adds more messages to the sorted list. This reads enough data such that
320 // oldest_message_time can be replayed safely. Returns false if the log file
321 // has all been read.
322 bool QueueMessages(monotonic_clock::time_point oldest_message_time);
Austin Schuh05b70472020-01-01 17:11:17 -0800323
Austin Schuhcde938c2020-02-02 17:30:07 -0800324 // Returns debug strings for a channel, and timestamps for a node.
325 std::string DebugString(int channel) const;
326 std::string DebugString(int channel, int node_index) const;
327
Austin Schuh8bd96322020-02-13 21:18:22 -0800328 // Returns true if all the messages have been queued from the last log file in
329 // the list of log files chunks.
330 bool at_end() const { return at_end_; }
331
Austin Schuh05b70472020-01-01 17:11:17 -0800332 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800333 // TODO(austin): Need to copy or refcount the message instead of running
334 // multiple copies of the reader. Or maybe have a "as_node" index and hide it
335 // inside.
336
Austin Schuhfa895892020-01-07 20:07:41 -0800337 // Moves to the next log file in the list.
338 bool NextLogFile();
339
Austin Schuh6f3babe2020-01-26 20:34:50 -0800340 // Filenames of the log files.
341 std::vector<std::string> filenames_;
342 // And the index of the next file to open.
343 size_t next_filename_index_ = 0;
Austin Schuh05b70472020-01-01 17:11:17 -0800344
Austin Schuhee711052020-08-24 16:06:09 -0700345 // Node we are reading as.
346 const Node *target_node_ = nullptr;
347
Austin Schuh6f3babe2020-01-26 20:34:50 -0800348 // Log file header to report. This is a copy.
Austin Schuh97789fc2020-08-01 14:42:45 -0700349 FlatbufferVector<LogFileHeader> log_file_header_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800350 // Current log file being read.
351 std::unique_ptr<MessageReader> message_reader_;
Austin Schuh05b70472020-01-01 17:11:17 -0800352
353 // Datastructure to hold the list of messages, cached timestamp for the
354 // oldest message, and sender to send with.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800355 struct MessageHeaderQueue {
356 // If true, this is a timestamp queue.
357 bool timestamps = false;
Austin Schuh05b70472020-01-01 17:11:17 -0800358
Austin Schuh6f3babe2020-01-26 20:34:50 -0800359 // Returns a reference to the the oldest message.
360 FlatbufferVector<MessageHeader> &front() {
361 CHECK_GT(data_.size(), 0u);
362 return data_.front();
Austin Schuh05b70472020-01-01 17:11:17 -0800363 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800364
Austin Schuhcde938c2020-02-02 17:30:07 -0800365 // Adds a message to the back of the queue. Returns true if it was actually
366 // emplaced.
367 bool emplace_back(FlatbufferVector<MessageHeader> &&msg);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800368
369 // Drops the front message. Invalidates the front() reference.
370 void pop_front();
371
372 // The size of the queue.
373 size_t size() { return data_.size(); }
374
Austin Schuhcde938c2020-02-02 17:30:07 -0800375 // Returns a debug string with info about each message in the queue.
376 std::string DebugString() const;
377
Austin Schuh6f3babe2020-01-26 20:34:50 -0800378 // Returns the (timestamp, queue_index) for the oldest message.
Austin Schuhcde938c2020-02-02 17:30:07 -0800379 const std::tuple<monotonic_clock::time_point, uint32_t,
380 const MessageHeader *>
381 front_timestamp() {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800382 CHECK_GT(data_.size(), 0u);
383 return std::make_tuple(
384 monotonic_clock::time_point(std::chrono::nanoseconds(
385 front().message().monotonic_sent_time())),
Austin Schuhcde938c2020-02-02 17:30:07 -0800386 front().message().queue_index(), &front().message());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800387 }
388
389 // Pointer to the timestamp merger for this queue if available.
390 TimestampMerger *timestamp_merger = nullptr;
391 // Pointer to the reader which feeds this queue.
392 SplitMessageReader *split_reader = nullptr;
393
394 private:
395 // The data.
396 std::deque<FlatbufferVector<MessageHeader>> data_;
Austin Schuh05b70472020-01-01 17:11:17 -0800397 };
398
Austin Schuh6f3babe2020-01-26 20:34:50 -0800399 // All the queues needed for a channel. There isn't going to be data in all
400 // of these.
401 struct ChannelData {
402 // The data queue for the channel.
403 MessageHeaderQueue data;
404 // Queues for timestamps for each node.
405 std::vector<MessageHeaderQueue> timestamps;
406 };
Austin Schuhfa895892020-01-07 20:07:41 -0800407
Austin Schuh6f3babe2020-01-26 20:34:50 -0800408 // Data for all the channels.
Austin Schuh05b70472020-01-01 17:11:17 -0800409 std::vector<ChannelData> channels_;
410
Austin Schuh6f3babe2020-01-26 20:34:50 -0800411 // Once we know the node that this SplitMessageReader will be writing as,
412 // there will be only one MessageHeaderQueue that a specific channel matches.
413 // Precompute this here for efficiency.
414 std::vector<MessageHeaderQueue *> channels_to_write_;
415
Austin Schuhcde938c2020-02-02 17:30:07 -0800416 monotonic_clock::time_point time_to_queue_ = monotonic_clock::min_time;
417
418 // Latches true when we hit the end of the last log file and there is no sense
419 // poking it further.
420 bool at_end_ = false;
421
422 // Timestamp of the newest message that was read and actually queued. We want
423 // to track this independently from the log file because we need the
424 // timestamps here to be timestamps of messages that are queued.
425 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800426};
427
428class ChannelMerger;
429
430// Sorts channels (and timestamps) from multiple log files for a single channel.
431class TimestampMerger {
432 public:
433 TimestampMerger(const Configuration *configuration,
434 std::vector<SplitMessageReader *> split_message_readers,
435 int channel_index, const Node *target_node,
436 ChannelMerger *channel_merger);
437
438 // Metadata used to schedule the message.
439 struct DeliveryTimestamp {
440 monotonic_clock::time_point monotonic_event_time =
441 monotonic_clock::min_time;
442 realtime_clock::time_point realtime_event_time = realtime_clock::min_time;
443
444 monotonic_clock::time_point monotonic_remote_time =
445 monotonic_clock::min_time;
446 realtime_clock::time_point realtime_remote_time = realtime_clock::min_time;
447 uint32_t remote_queue_index = 0xffffffff;
448 };
449
450 // Pushes SplitMessageReader onto the timestamp heap. This should only be
451 // called when timestamps are placed in the channel this class is merging for
452 // the reader.
453 void UpdateTimestamp(
454 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800455 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
456 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800457 PushTimestampHeap(oldest_message_time, split_message_reader);
458 }
459 // Pushes SplitMessageReader onto the message heap. This should only be
460 // called when data is placed in the channel this class is merging for the
461 // reader.
462 void Update(
463 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800464 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
465 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800466 PushMessageHeap(oldest_message_time, split_message_reader);
467 }
468
Austin Schuhcde938c2020-02-02 17:30:07 -0800469 // Returns the oldest combined timestamp and data for this channel. If there
470 // isn't a matching piece of data, returns only the timestamp with no data.
471 // The caller can determine what the appropriate action is to recover.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800472 std::tuple<DeliveryTimestamp, FlatbufferVector<MessageHeader>> PopOldest();
473
Austin Schuh8bd96322020-02-13 21:18:22 -0800474 // Returns the oldest forwarding timestamp.
475 DeliveryTimestamp OldestTimestamp() const;
476
Austin Schuh6f3babe2020-01-26 20:34:50 -0800477 // Tracks if the channel merger has pushed this onto it's heap or not.
478 bool pushed() { return pushed_; }
479 // Sets if this has been pushed to the channel merger heap. Should only be
480 // called by the channel merger.
481 void set_pushed(bool pushed) { pushed_ = pushed; }
482
Austin Schuhcde938c2020-02-02 17:30:07 -0800483 // Returns a debug string with the heaps printed out.
484 std::string DebugString() const;
485
Austin Schuh8bd96322020-02-13 21:18:22 -0800486 // Returns true if we have timestamps.
487 bool has_timestamps() const { return has_timestamps_; }
488
489 // Records that one of the log files ran out of data. This should only be
490 // called by a SplitMessageReader.
491 void NoticeAtEnd();
492
Austin Schuh6f3babe2020-01-26 20:34:50 -0800493 private:
494 // Pushes messages and timestamps to the corresponding heaps.
495 void PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800496 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
497 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800498 SplitMessageReader *split_message_reader);
499 void PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800500 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
501 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800502 SplitMessageReader *split_message_reader);
503
504 // Pops a message from the message heap. This automatically triggers the
505 // split message reader to re-fetch any new data.
506 std::tuple<monotonic_clock::time_point, uint32_t,
507 FlatbufferVector<MessageHeader>>
508 PopMessageHeap();
Austin Schuhcde938c2020-02-02 17:30:07 -0800509
510 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
511 oldest_message() const;
512 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
513 oldest_timestamp() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800514 // Pops a message from the timestamp heap. This automatically triggers the
515 // split message reader to re-fetch any new data.
516 std::tuple<monotonic_clock::time_point, uint32_t,
517 FlatbufferVector<MessageHeader>>
518 PopTimestampHeap();
519
520 const Configuration *configuration_;
521
522 // If true, this is a forwarded channel and timestamps should be matched.
523 bool has_timestamps_ = false;
524
525 // Tracks if the ChannelMerger has pushed this onto it's queue.
526 bool pushed_ = false;
527
528 // The split message readers used for source data.
529 std::vector<SplitMessageReader *> split_message_readers_;
530
531 // The channel to merge.
532 int channel_index_;
533
534 // Our node.
535 int node_index_;
536
537 // Heaps for messages and timestamps.
538 std::vector<
539 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
540 message_heap_;
541 std::vector<
542 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
543 timestamp_heap_;
544
545 // Parent channel merger.
546 ChannelMerger *channel_merger_;
547};
548
549// This class handles constructing all the split message readers, channel
550// mergers, and combining the results.
551class ChannelMerger {
552 public:
553 // Builds a ChannelMerger around a set of log files. These are of the format:
554 // {
555 // {log1_part0, log1_part1, ...},
556 // {log2}
557 // }
558 // The inner vector is a list of log file chunks which form up a log file.
559 // The outer vector is a list of log files with subsets of the messages, or
560 // messages from different nodes.
561 ChannelMerger(const std::vector<std::vector<std::string>> &filenames);
562
563 // Returns the nodes that we know how to merge.
564 const std::vector<const Node *> nodes() const;
565 // Sets the node that we will return messages as. Returns true if the node
566 // has log files and will produce data. This can only be called once, and
567 // will likely corrupt state if called a second time.
568 bool SetNode(const Node *target_node);
569
570 // Everything else needs the node set before it works.
571
572 // Returns a timestamp for the oldest message in this group of logfiles.
Austin Schuh858c9f32020-08-31 16:56:12 -0700573 monotonic_clock::time_point OldestMessageTime() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800574 // Pops the oldest message.
575 std::tuple<TimestampMerger::DeliveryTimestamp, int,
576 FlatbufferVector<MessageHeader>>
577 PopOldest();
578
Austin Schuh8bd96322020-02-13 21:18:22 -0800579 // Returns the oldest timestamp in the timestamp heap.
580 TimestampMerger::DeliveryTimestamp OldestTimestamp() const;
581 // Returns the oldest timestamp in the timestamp heap for a specific channel.
582 TimestampMerger::DeliveryTimestamp OldestTimestampForChannel(
583 int channel) const;
584
Austin Schuh6f3babe2020-01-26 20:34:50 -0800585 // Returns the config for this set of log files.
586 const Configuration *configuration() const {
587 return log_file_header()->configuration();
588 }
589
590 const LogFileHeader *log_file_header() const {
591 return &log_file_header_.message();
592 }
593
594 // Returns the start times for the configured node's log files.
Austin Schuhcde938c2020-02-02 17:30:07 -0800595 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800596 return monotonic_clock::time_point(
597 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
598 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800599 realtime_clock::time_point realtime_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800600 return realtime_clock::time_point(
601 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
602 }
603
604 // Returns the node set by SetNode above.
605 const Node *node() const { return node_; }
606
607 // Called by the TimestampMerger when new data is available with the provided
608 // timestamp and channel_index.
609 void Update(monotonic_clock::time_point timestamp, int channel_index) {
610 PushChannelHeap(timestamp, channel_index);
611 }
612
Austin Schuhcde938c2020-02-02 17:30:07 -0800613 // Returns a debug string with all the heaps in it. Generally only useful for
614 // debugging what went wrong.
615 std::string DebugString() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800616
Austin Schuh8bd96322020-02-13 21:18:22 -0800617 // Returns true if one of the log files has finished reading everything. When
618 // log file chunks are involved, this means that the last chunk in a log file
619 // has been read. It is acceptable to be missing data at this point in time.
620 bool at_end() const { return at_end_; }
621
622 // Marks that one of the log files is at the end. This should only be called
623 // by timestamp mergers.
624 void NoticeAtEnd() { at_end_ = true; }
625
Austin Schuhcde938c2020-02-02 17:30:07 -0800626 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800627 // Pushes the timestamp for new data on the provided channel.
628 void PushChannelHeap(monotonic_clock::time_point timestamp,
629 int channel_index);
630
631 // All the message readers.
632 std::vector<std::unique_ptr<SplitMessageReader>> split_message_readers_;
633
634 // The log header we are claiming to be.
Austin Schuh97789fc2020-08-01 14:42:45 -0700635 FlatbufferVector<LogFileHeader> log_file_header_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800636
637 // The timestamp mergers which combine data from the split message readers.
638 std::vector<TimestampMerger> timestamp_mergers_;
639
640 // A heap of the channel readers and timestamps for the oldest data in each.
Austin Schuh05b70472020-01-01 17:11:17 -0800641 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800642 // A heap of just the timestamp channel readers and timestamps for the oldest
643 // data in each.
644 std::vector<std::pair<monotonic_clock::time_point, int>> timestamp_heap_;
Austin Schuh05b70472020-01-01 17:11:17 -0800645
Austin Schuh6f3babe2020-01-26 20:34:50 -0800646 // Configured node.
647 const Node *node_;
648
Austin Schuh8bd96322020-02-13 21:18:22 -0800649 bool at_end_ = false;
650
Austin Schuh6f3babe2020-01-26 20:34:50 -0800651 // Cached copy of the list of nodes.
652 std::vector<const Node *> nodes_;
Austin Schuh05b70472020-01-01 17:11:17 -0800653};
Austin Schuha36c8902019-12-30 18:07:15 -0800654
Austin Schuhee711052020-08-24 16:06:09 -0700655// Returns the node name with a trailing space, or an empty string if we are on
656// a single node.
657std::string MaybeNodeName(const Node *);
658
Austin Schuha36c8902019-12-30 18:07:15 -0800659} // namespace logger
660} // namespace aos
661
662#endif // AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_