blob: 4ab4dca600eb0e2d36417b8311066f2ccdcc2705 [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#ifndef AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
2#define AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
3
4#include <sys/uio.h>
5
Austin Schuh05b70472020-01-01 17:11:17 -08006#include <deque>
7#include <optional>
Austin Schuhfa895892020-01-07 20:07:41 -08008#include <string>
Austin Schuha36c8902019-12-30 18:07:15 -08009#include <string_view>
10#include <vector>
11
Austin Schuh05b70472020-01-01 17:11:17 -080012#include "absl/types/span.h"
Austin Schuha36c8902019-12-30 18:07:15 -080013#include "aos/events/event_loop.h"
14#include "aos/events/logging/logger_generated.h"
15#include "flatbuffers/flatbuffers.h"
16
17namespace aos {
18namespace logger {
19
20enum class LogType : uint8_t {
21 // The message originated on this node and should be logged here.
22 kLogMessage,
23 // The message originated on another node, but only the delivery times are
24 // logged here.
25 kLogDeliveryTimeOnly,
26 // The message originated on another node. Log it and the delivery times
27 // together. The message_gateway is responsible for logging any messages
28 // which didn't get delivered.
Austin Schuh6f3babe2020-01-26 20:34:50 -080029 kLogMessageAndDeliveryTime,
30 // The message originated on the other node and should be logged on this node.
31 kLogRemoteMessage
Austin Schuha36c8902019-12-30 18:07:15 -080032};
33
Austin Schuha36c8902019-12-30 18:07:15 -080034// This class manages efficiently writing a sequence of detached buffers to a
35// file. It queues them up and batches the write operation.
36class DetachedBufferWriter {
37 public:
38 DetachedBufferWriter(std::string_view filename);
39 ~DetachedBufferWriter();
40
Austin Schuh6f3babe2020-01-26 20:34:50 -080041 std::string_view filename() const { return filename_; }
42
Austin Schuha36c8902019-12-30 18:07:15 -080043 // TODO(austin): Snappy compress the log file if it ends with .snappy!
44
45 // Queues up a finished FlatBufferBuilder to be written. Steals the detached
46 // buffer from it.
47 void QueueSizedFlatbuffer(flatbuffers::FlatBufferBuilder *fbb);
48 // Queues up a detached buffer directly.
49 void QueueSizedFlatbuffer(flatbuffers::DetachedBuffer &&buffer);
Austin Schuhde031b72020-01-10 19:34:41 -080050 // Writes a Span. This is not terribly optimized right now.
51 void WriteSizedFlatbuffer(absl::Span<const uint8_t> span);
Austin Schuha36c8902019-12-30 18:07:15 -080052
53 // Triggers data to be provided to the kernel and written.
54 void Flush();
55
56 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -080057 const std::string filename_;
58
Austin Schuha36c8902019-12-30 18:07:15 -080059 int fd_ = -1;
60
61 // Size of all the data in the queue.
62 size_t queued_size_ = 0;
63
64 // List of buffers to flush.
65 std::vector<flatbuffers::DetachedBuffer> queue_;
66 // List of iovecs to use with writev. This is a member variable to avoid
67 // churn.
68 std::vector<struct iovec> iovec_;
69};
70
71// Packes a message pointed to by the context into a MessageHeader.
72flatbuffers::Offset<MessageHeader> PackMessage(
73 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
74 int channel_index, LogType log_type);
75
Austin Schuh6f3babe2020-01-26 20:34:50 -080076FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename);
77
Austin Schuh05b70472020-01-01 17:11:17 -080078// Class to read chunks out of a log file.
79class SpanReader {
80 public:
81 SpanReader(std::string_view filename);
Austin Schuha36c8902019-12-30 18:07:15 -080082
Austin Schuh05b70472020-01-01 17:11:17 -080083 ~SpanReader() { close(fd_); }
84
Austin Schuh6f3babe2020-01-26 20:34:50 -080085 std::string_view filename() const { return filename_; }
86
Austin Schuh05b70472020-01-01 17:11:17 -080087 // Returns a span with the data for a message from the log file, excluding
88 // the size.
89 absl::Span<const uint8_t> ReadMessage();
90
91 // Returns true if there is a full message available in the buffer, or if we
92 // will have to read more data from disk.
93 bool MessageAvailable();
94
95 private:
96 // TODO(austin): Optimization:
97 // Allocate the 256k blocks like we do today. But, refcount them with
98 // shared_ptr pointed to by the messageheader that is returned. This avoids
99 // the copy. Need to do more benchmarking.
100
101 // Reads a chunk of data into data_. Returns false if no data was read.
102 bool ReadBlock();
103
Austin Schuh6f3babe2020-01-26 20:34:50 -0800104 const std::string filename_;
105
Austin Schuh05b70472020-01-01 17:11:17 -0800106 // File descriptor for the log file.
107 int fd_ = -1;
108
109 // Allocator which doesn't zero initialize memory.
110 template <typename T>
111 struct DefaultInitAllocator {
112 typedef T value_type;
113
114 template <typename U>
115 void construct(U *p) {
116 ::new (static_cast<void *>(p)) U;
117 }
118
119 template <typename U, typename... Args>
120 void construct(U *p, Args &&... args) {
121 ::new (static_cast<void *>(p)) U(std::forward<Args>(args)...);
122 }
123
124 T *allocate(std::size_t n) {
125 return reinterpret_cast<T *>(::operator new(sizeof(T) * n));
126 }
127
128 template <typename U>
129 void deallocate(U *p, std::size_t /*n*/) {
130 ::operator delete(static_cast<void *>(p));
131 }
132 };
133
134 // Vector to read into. This uses an allocator which doesn't zero
135 // initialize the memory.
136 std::vector<uint8_t, DefaultInitAllocator<uint8_t>> data_;
137
138 // Amount of data consumed already in data_.
139 size_t consumed_data_ = 0;
140
141 // Cached bit for if we have reached the end of the file. Otherwise we will
142 // hammer on the kernel asking for more data each time we send.
143 bool end_of_file_ = false;
144};
145
146// Class which handles reading the header and messages from the log file. This
147// handles any per-file state left before merging below.
148class MessageReader {
149 public:
150 MessageReader(std::string_view filename);
151
Austin Schuh6f3babe2020-01-26 20:34:50 -0800152 std::string_view filename() const { return span_reader_.filename(); }
153
Austin Schuh05b70472020-01-01 17:11:17 -0800154 // Returns the header from the log file.
155 const LogFileHeader *log_file_header() const {
156 return flatbuffers::GetSizePrefixedRoot<LogFileHeader>(
157 configuration_.data());
158 }
159
160 // Returns the minimum maount of data needed to queue up for sorting before
161 // ware guarenteed to not see data out of order.
162 std::chrono::nanoseconds max_out_of_order_duration() const {
163 return max_out_of_order_duration_;
164 }
165
Austin Schuhcde938c2020-02-02 17:30:07 -0800166 // Returns the newest timestamp read out of the log file.
Austin Schuh05b70472020-01-01 17:11:17 -0800167 monotonic_clock::time_point newest_timestamp() const {
168 return newest_timestamp_;
169 }
170
171 // Returns the next message if there is one.
172 std::optional<FlatbufferVector<MessageHeader>> ReadMessage();
173
174 // The time at which we need to read another chunk from the logfile.
175 monotonic_clock::time_point queue_data_time() const {
176 return newest_timestamp() - max_out_of_order_duration();
177 }
178
179 private:
180 // Log chunk reader.
181 SpanReader span_reader_;
182
183 // Vector holding the data for the configuration.
184 std::vector<uint8_t> configuration_;
185
186 // Minimum amount of data to queue up for sorting before we are guarenteed
187 // to not see data out of order.
188 std::chrono::nanoseconds max_out_of_order_duration_;
189
190 // Timestamp of the newest message in a channel queue.
191 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
192};
193
Austin Schuh6f3babe2020-01-26 20:34:50 -0800194class TimestampMerger;
Austin Schuh05b70472020-01-01 17:11:17 -0800195
Austin Schuh6f3babe2020-01-26 20:34:50 -0800196// A design requirement is that the relevant data for a channel is not more than
197// max_out_of_order_duration out of order. We approach sorting in layers.
198//
199// 1) Split each (maybe chunked) log file into one queue per channel. Read this
200// log file looking for data pertaining to a specific node.
201// (SplitMessageReader)
202// 2) Merge all the data per channel from the different log files into a sorted
203// list of timestamps and messages. (TimestampMerger)
204// 3) Combine the timestamps and messages. (TimestampMerger)
205// 4) Merge all the channels to produce the next message on a node.
206// (ChannelMerger)
207// 5) Duplicate this entire stack per node.
208
209// This class splits messages and timestamps up into a queue per channel, and
210// handles reading data from multiple chunks.
211class SplitMessageReader {
212 public:
213 SplitMessageReader(const std::vector<std::string> &filenames);
214
215 // Sets the TimestampMerger that gets notified for each channel. The node
216 // that the TimestampMerger is merging as needs to be passed in.
217 void SetTimestampMerger(TimestampMerger *timestamp_merger, int channel,
218 const Node *target_node);
219
220 // Returns the (timestamp, queue_idex) for the oldest message in a channel, or
221 // max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800222 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
223 oldest_message(int channel) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800224 return channels_[channel].data.front_timestamp();
225 }
226
227 // Returns the (timestamp, queue_index) for the oldest delivery time in a
228 // channel, or max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800229 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
230 oldest_message(int channel, int destination_node) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800231 return channels_[channel].timestamps[destination_node].front_timestamp();
232 }
233
234 // Returns the timestamp, queue_index, and message for the oldest data on a
235 // channel. Requeues data as needed.
236 std::tuple<monotonic_clock::time_point, uint32_t,
237 FlatbufferVector<MessageHeader>>
238 PopOldest(int channel_index);
239
240 // Returns the timestamp, queue_index, and message for the oldest timestamp on
241 // a channel delivered to a node. Requeues data as needed.
242 std::tuple<monotonic_clock::time_point, uint32_t,
243 FlatbufferVector<MessageHeader>>
244 PopOldest(int channel, int node_index);
245
246 // Returns the header for the log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800247 const LogFileHeader *log_file_header() const {
Austin Schuhfa895892020-01-07 20:07:41 -0800248 return &log_file_header_.message();
Austin Schuh05b70472020-01-01 17:11:17 -0800249 }
250
Austin Schuh6f3babe2020-01-26 20:34:50 -0800251 // Returns the starting time for this set of log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800252 monotonic_clock::time_point monotonic_start_time() {
253 return monotonic_clock::time_point(
254 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
255 }
256 realtime_clock::time_point realtime_start_time() {
257 return realtime_clock::time_point(
258 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
259 }
260
Austin Schuh6f3babe2020-01-26 20:34:50 -0800261 // Returns the configuration from the log file header.
262 const Configuration *configuration() const {
263 return log_file_header()->configuration();
264 }
265
Austin Schuh05b70472020-01-01 17:11:17 -0800266 // Returns the node who's point of view this log file is from. Make sure this
267 // is a pointer in the configuration() nodes list so it can be consumed
268 // elsewhere.
269 const Node *node() const {
270 if (configuration()->has_nodes()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800271 return configuration::GetNodeOrDie(configuration(),
272 log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800273 } else {
274 CHECK(!log_file_header()->has_node());
275 return nullptr;
276 }
277 }
278
Austin Schuh6f3babe2020-01-26 20:34:50 -0800279 // Returns the timestamp of the newest message read from the log file, and the
280 // timestamp that we need to re-queue data.
281 monotonic_clock::time_point newest_timestamp() const {
Austin Schuhcde938c2020-02-02 17:30:07 -0800282 return newest_timestamp_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800283 }
284
Austin Schuhcde938c2020-02-02 17:30:07 -0800285 // Returns the next time to trigger a requeue.
286 monotonic_clock::time_point time_to_queue() const { return time_to_queue_; }
287
288 // Returns the minimum amount of data needed to queue up for sorting before
289 // ware guarenteed to not see data out of order.
290 std::chrono::nanoseconds max_out_of_order_duration() const {
291 return message_reader_->max_out_of_order_duration();
292 }
293
294 std::string_view filename() const { return message_reader_->filename(); }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800295
296 // Adds more messages to the sorted list. This reads enough data such that
297 // oldest_message_time can be replayed safely. Returns false if the log file
298 // has all been read.
299 bool QueueMessages(monotonic_clock::time_point oldest_message_time);
Austin Schuh05b70472020-01-01 17:11:17 -0800300
Austin Schuhcde938c2020-02-02 17:30:07 -0800301 // Returns debug strings for a channel, and timestamps for a node.
302 std::string DebugString(int channel) const;
303 std::string DebugString(int channel, int node_index) const;
304
Austin Schuh8bd96322020-02-13 21:18:22 -0800305 // Returns true if all the messages have been queued from the last log file in
306 // the list of log files chunks.
307 bool at_end() const { return at_end_; }
308
Austin Schuh05b70472020-01-01 17:11:17 -0800309 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800310 // TODO(austin): Need to copy or refcount the message instead of running
311 // multiple copies of the reader. Or maybe have a "as_node" index and hide it
312 // inside.
313
Austin Schuhfa895892020-01-07 20:07:41 -0800314 // Moves to the next log file in the list.
315 bool NextLogFile();
316
Austin Schuh6f3babe2020-01-26 20:34:50 -0800317 // Filenames of the log files.
318 std::vector<std::string> filenames_;
319 // And the index of the next file to open.
320 size_t next_filename_index_ = 0;
Austin Schuh05b70472020-01-01 17:11:17 -0800321
Austin Schuh6f3babe2020-01-26 20:34:50 -0800322 // Log file header to report. This is a copy.
323 FlatbufferDetachedBuffer<LogFileHeader> log_file_header_;
324 // Current log file being read.
325 std::unique_ptr<MessageReader> message_reader_;
Austin Schuh05b70472020-01-01 17:11:17 -0800326
327 // Datastructure to hold the list of messages, cached timestamp for the
328 // oldest message, and sender to send with.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800329 struct MessageHeaderQueue {
330 // If true, this is a timestamp queue.
331 bool timestamps = false;
Austin Schuh05b70472020-01-01 17:11:17 -0800332
Austin Schuh6f3babe2020-01-26 20:34:50 -0800333 // Returns a reference to the the oldest message.
334 FlatbufferVector<MessageHeader> &front() {
335 CHECK_GT(data_.size(), 0u);
336 return data_.front();
Austin Schuh05b70472020-01-01 17:11:17 -0800337 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800338
Austin Schuhcde938c2020-02-02 17:30:07 -0800339 // Adds a message to the back of the queue. Returns true if it was actually
340 // emplaced.
341 bool emplace_back(FlatbufferVector<MessageHeader> &&msg);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800342
343 // Drops the front message. Invalidates the front() reference.
344 void pop_front();
345
346 // The size of the queue.
347 size_t size() { return data_.size(); }
348
Austin Schuhcde938c2020-02-02 17:30:07 -0800349 // Returns a debug string with info about each message in the queue.
350 std::string DebugString() const;
351
Austin Schuh6f3babe2020-01-26 20:34:50 -0800352 // Returns the (timestamp, queue_index) for the oldest message.
Austin Schuhcde938c2020-02-02 17:30:07 -0800353 const std::tuple<monotonic_clock::time_point, uint32_t,
354 const MessageHeader *>
355 front_timestamp() {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800356 CHECK_GT(data_.size(), 0u);
357 return std::make_tuple(
358 monotonic_clock::time_point(std::chrono::nanoseconds(
359 front().message().monotonic_sent_time())),
Austin Schuhcde938c2020-02-02 17:30:07 -0800360 front().message().queue_index(), &front().message());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800361 }
362
363 // Pointer to the timestamp merger for this queue if available.
364 TimestampMerger *timestamp_merger = nullptr;
365 // Pointer to the reader which feeds this queue.
366 SplitMessageReader *split_reader = nullptr;
367
368 private:
369 // The data.
370 std::deque<FlatbufferVector<MessageHeader>> data_;
Austin Schuh05b70472020-01-01 17:11:17 -0800371 };
372
Austin Schuh6f3babe2020-01-26 20:34:50 -0800373 // All the queues needed for a channel. There isn't going to be data in all
374 // of these.
375 struct ChannelData {
376 // The data queue for the channel.
377 MessageHeaderQueue data;
378 // Queues for timestamps for each node.
379 std::vector<MessageHeaderQueue> timestamps;
380 };
Austin Schuhfa895892020-01-07 20:07:41 -0800381
Austin Schuh6f3babe2020-01-26 20:34:50 -0800382 // Data for all the channels.
Austin Schuh05b70472020-01-01 17:11:17 -0800383 std::vector<ChannelData> channels_;
384
Austin Schuh6f3babe2020-01-26 20:34:50 -0800385 // Once we know the node that this SplitMessageReader will be writing as,
386 // there will be only one MessageHeaderQueue that a specific channel matches.
387 // Precompute this here for efficiency.
388 std::vector<MessageHeaderQueue *> channels_to_write_;
389
Austin Schuhcde938c2020-02-02 17:30:07 -0800390 monotonic_clock::time_point time_to_queue_ = monotonic_clock::min_time;
391
392 // Latches true when we hit the end of the last log file and there is no sense
393 // poking it further.
394 bool at_end_ = false;
395
396 // Timestamp of the newest message that was read and actually queued. We want
397 // to track this independently from the log file because we need the
398 // timestamps here to be timestamps of messages that are queued.
399 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800400};
401
402class ChannelMerger;
403
404// Sorts channels (and timestamps) from multiple log files for a single channel.
405class TimestampMerger {
406 public:
407 TimestampMerger(const Configuration *configuration,
408 std::vector<SplitMessageReader *> split_message_readers,
409 int channel_index, const Node *target_node,
410 ChannelMerger *channel_merger);
411
412 // Metadata used to schedule the message.
413 struct DeliveryTimestamp {
414 monotonic_clock::time_point monotonic_event_time =
415 monotonic_clock::min_time;
416 realtime_clock::time_point realtime_event_time = realtime_clock::min_time;
417
418 monotonic_clock::time_point monotonic_remote_time =
419 monotonic_clock::min_time;
420 realtime_clock::time_point realtime_remote_time = realtime_clock::min_time;
421 uint32_t remote_queue_index = 0xffffffff;
422 };
423
424 // Pushes SplitMessageReader onto the timestamp heap. This should only be
425 // called when timestamps are placed in the channel this class is merging for
426 // the reader.
427 void UpdateTimestamp(
428 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800429 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
430 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800431 PushTimestampHeap(oldest_message_time, split_message_reader);
432 }
433 // Pushes SplitMessageReader onto the message heap. This should only be
434 // called when data is placed in the channel this class is merging for the
435 // reader.
436 void Update(
437 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800438 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
439 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800440 PushMessageHeap(oldest_message_time, split_message_reader);
441 }
442
Austin Schuhcde938c2020-02-02 17:30:07 -0800443 // Returns the oldest combined timestamp and data for this channel. If there
444 // isn't a matching piece of data, returns only the timestamp with no data.
445 // The caller can determine what the appropriate action is to recover.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800446 std::tuple<DeliveryTimestamp, FlatbufferVector<MessageHeader>> PopOldest();
447
Austin Schuh8bd96322020-02-13 21:18:22 -0800448 // Returns the oldest forwarding timestamp.
449 DeliveryTimestamp OldestTimestamp() const;
450
Austin Schuh6f3babe2020-01-26 20:34:50 -0800451 // Tracks if the channel merger has pushed this onto it's heap or not.
452 bool pushed() { return pushed_; }
453 // Sets if this has been pushed to the channel merger heap. Should only be
454 // called by the channel merger.
455 void set_pushed(bool pushed) { pushed_ = pushed; }
456
Austin Schuhcde938c2020-02-02 17:30:07 -0800457 // Returns a debug string with the heaps printed out.
458 std::string DebugString() const;
459
Austin Schuh8bd96322020-02-13 21:18:22 -0800460 // Returns true if we have timestamps.
461 bool has_timestamps() const { return has_timestamps_; }
462
463 // Records that one of the log files ran out of data. This should only be
464 // called by a SplitMessageReader.
465 void NoticeAtEnd();
466
Austin Schuh6f3babe2020-01-26 20:34:50 -0800467 private:
468 // Pushes messages and timestamps to the corresponding heaps.
469 void PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800470 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
471 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800472 SplitMessageReader *split_message_reader);
473 void PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800474 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
475 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800476 SplitMessageReader *split_message_reader);
477
478 // Pops a message from the message heap. This automatically triggers the
479 // split message reader to re-fetch any new data.
480 std::tuple<monotonic_clock::time_point, uint32_t,
481 FlatbufferVector<MessageHeader>>
482 PopMessageHeap();
Austin Schuhcde938c2020-02-02 17:30:07 -0800483
484 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
485 oldest_message() const;
486 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
487 oldest_timestamp() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800488 // Pops a message from the timestamp heap. This automatically triggers the
489 // split message reader to re-fetch any new data.
490 std::tuple<monotonic_clock::time_point, uint32_t,
491 FlatbufferVector<MessageHeader>>
492 PopTimestampHeap();
493
494 const Configuration *configuration_;
495
496 // If true, this is a forwarded channel and timestamps should be matched.
497 bool has_timestamps_ = false;
498
499 // Tracks if the ChannelMerger has pushed this onto it's queue.
500 bool pushed_ = false;
501
502 // The split message readers used for source data.
503 std::vector<SplitMessageReader *> split_message_readers_;
504
505 // The channel to merge.
506 int channel_index_;
507
508 // Our node.
509 int node_index_;
510
511 // Heaps for messages and timestamps.
512 std::vector<
513 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
514 message_heap_;
515 std::vector<
516 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
517 timestamp_heap_;
518
519 // Parent channel merger.
520 ChannelMerger *channel_merger_;
521};
522
523// This class handles constructing all the split message readers, channel
524// mergers, and combining the results.
525class ChannelMerger {
526 public:
527 // Builds a ChannelMerger around a set of log files. These are of the format:
528 // {
529 // {log1_part0, log1_part1, ...},
530 // {log2}
531 // }
532 // The inner vector is a list of log file chunks which form up a log file.
533 // The outer vector is a list of log files with subsets of the messages, or
534 // messages from different nodes.
535 ChannelMerger(const std::vector<std::vector<std::string>> &filenames);
536
537 // Returns the nodes that we know how to merge.
538 const std::vector<const Node *> nodes() const;
539 // Sets the node that we will return messages as. Returns true if the node
540 // has log files and will produce data. This can only be called once, and
541 // will likely corrupt state if called a second time.
542 bool SetNode(const Node *target_node);
543
544 // Everything else needs the node set before it works.
545
546 // Returns a timestamp for the oldest message in this group of logfiles.
547 monotonic_clock::time_point OldestMessage() const;
548 // Pops the oldest message.
549 std::tuple<TimestampMerger::DeliveryTimestamp, int,
550 FlatbufferVector<MessageHeader>>
551 PopOldest();
552
Austin Schuh8bd96322020-02-13 21:18:22 -0800553 // Returns the oldest timestamp in the timestamp heap.
554 TimestampMerger::DeliveryTimestamp OldestTimestamp() const;
555 // Returns the oldest timestamp in the timestamp heap for a specific channel.
556 TimestampMerger::DeliveryTimestamp OldestTimestampForChannel(
557 int channel) const;
558
Austin Schuh6f3babe2020-01-26 20:34:50 -0800559 // Returns the config for this set of log files.
560 const Configuration *configuration() const {
561 return log_file_header()->configuration();
562 }
563
564 const LogFileHeader *log_file_header() const {
565 return &log_file_header_.message();
566 }
567
568 // Returns the start times for the configured node's log files.
Austin Schuhcde938c2020-02-02 17:30:07 -0800569 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800570 return monotonic_clock::time_point(
571 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
572 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800573 realtime_clock::time_point realtime_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800574 return realtime_clock::time_point(
575 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
576 }
577
578 // Returns the node set by SetNode above.
579 const Node *node() const { return node_; }
580
581 // Called by the TimestampMerger when new data is available with the provided
582 // timestamp and channel_index.
583 void Update(monotonic_clock::time_point timestamp, int channel_index) {
584 PushChannelHeap(timestamp, channel_index);
585 }
586
Austin Schuhcde938c2020-02-02 17:30:07 -0800587 // Returns a debug string with all the heaps in it. Generally only useful for
588 // debugging what went wrong.
589 std::string DebugString() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800590
Austin Schuh8bd96322020-02-13 21:18:22 -0800591 // Returns true if one of the log files has finished reading everything. When
592 // log file chunks are involved, this means that the last chunk in a log file
593 // has been read. It is acceptable to be missing data at this point in time.
594 bool at_end() const { return at_end_; }
595
596 // Marks that one of the log files is at the end. This should only be called
597 // by timestamp mergers.
598 void NoticeAtEnd() { at_end_ = true; }
599
Austin Schuhcde938c2020-02-02 17:30:07 -0800600 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800601 // Pushes the timestamp for new data on the provided channel.
602 void PushChannelHeap(monotonic_clock::time_point timestamp,
603 int channel_index);
604
605 // All the message readers.
606 std::vector<std::unique_ptr<SplitMessageReader>> split_message_readers_;
607
608 // The log header we are claiming to be.
609 FlatbufferDetachedBuffer<LogFileHeader> log_file_header_;
610
611 // The timestamp mergers which combine data from the split message readers.
612 std::vector<TimestampMerger> timestamp_mergers_;
613
614 // A heap of the channel readers and timestamps for the oldest data in each.
Austin Schuh05b70472020-01-01 17:11:17 -0800615 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800616 // A heap of just the timestamp channel readers and timestamps for the oldest
617 // data in each.
618 std::vector<std::pair<monotonic_clock::time_point, int>> timestamp_heap_;
Austin Schuh05b70472020-01-01 17:11:17 -0800619
Austin Schuh6f3babe2020-01-26 20:34:50 -0800620 // Configured node.
621 const Node *node_;
622
Austin Schuh8bd96322020-02-13 21:18:22 -0800623 bool at_end_ = false;
624
Austin Schuh6f3babe2020-01-26 20:34:50 -0800625 // Cached copy of the list of nodes.
626 std::vector<const Node *> nodes_;
Austin Schuh05b70472020-01-01 17:11:17 -0800627};
Austin Schuha36c8902019-12-30 18:07:15 -0800628
629} // namespace logger
630} // namespace aos
631
632#endif // AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_