blob: 7acbbd37bb91d2e9a425f5220349b13cfb6c2c0f [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#ifndef AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
2#define AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
3
4#include <sys/uio.h>
5
Austin Schuh05b70472020-01-01 17:11:17 -08006#include <deque>
7#include <optional>
Austin Schuhfa895892020-01-07 20:07:41 -08008#include <string>
Austin Schuha36c8902019-12-30 18:07:15 -08009#include <string_view>
Brian Silverman98360e22020-04-28 16:51:20 -070010#include <tuple>
Austin Schuha36c8902019-12-30 18:07:15 -080011#include <vector>
12
Austin Schuh05b70472020-01-01 17:11:17 -080013#include "absl/types/span.h"
Austin Schuha36c8902019-12-30 18:07:15 -080014#include "aos/events/event_loop.h"
15#include "aos/events/logging/logger_generated.h"
16#include "flatbuffers/flatbuffers.h"
17
18namespace aos {
19namespace logger {
20
21enum class LogType : uint8_t {
22 // The message originated on this node and should be logged here.
23 kLogMessage,
24 // The message originated on another node, but only the delivery times are
25 // logged here.
26 kLogDeliveryTimeOnly,
27 // The message originated on another node. Log it and the delivery times
28 // together. The message_gateway is responsible for logging any messages
29 // which didn't get delivered.
Austin Schuh6f3babe2020-01-26 20:34:50 -080030 kLogMessageAndDeliveryTime,
31 // The message originated on the other node and should be logged on this node.
32 kLogRemoteMessage
Austin Schuha36c8902019-12-30 18:07:15 -080033};
34
Austin Schuha36c8902019-12-30 18:07:15 -080035// This class manages efficiently writing a sequence of detached buffers to a
36// file. It queues them up and batches the write operation.
37class DetachedBufferWriter {
38 public:
39 DetachedBufferWriter(std::string_view filename);
40 ~DetachedBufferWriter();
41
Brian Silverman98360e22020-04-28 16:51:20 -070042 DetachedBufferWriter(const DetachedBufferWriter &) = delete;
43 DetachedBufferWriter &operator=(const DetachedBufferWriter &) = delete;
44
Austin Schuh6f3babe2020-01-26 20:34:50 -080045 std::string_view filename() const { return filename_; }
46
Austin Schuha36c8902019-12-30 18:07:15 -080047 // TODO(austin): Snappy compress the log file if it ends with .snappy!
48
49 // Queues up a finished FlatBufferBuilder to be written. Steals the detached
50 // buffer from it.
51 void QueueSizedFlatbuffer(flatbuffers::FlatBufferBuilder *fbb);
52 // Queues up a detached buffer directly.
53 void QueueSizedFlatbuffer(flatbuffers::DetachedBuffer &&buffer);
Austin Schuhde031b72020-01-10 19:34:41 -080054 // Writes a Span. This is not terribly optimized right now.
55 void WriteSizedFlatbuffer(absl::Span<const uint8_t> span);
Austin Schuha36c8902019-12-30 18:07:15 -080056
57 // Triggers data to be provided to the kernel and written.
58 void Flush();
59
Brian Silverman98360e22020-04-28 16:51:20 -070060 // Returns the number of bytes written.
61 size_t written_size() const { return written_size_; }
62
63 // Returns the number of bytes written or currently queued.
64 size_t total_size() const { return written_size_ + queued_size_; }
65
Austin Schuha36c8902019-12-30 18:07:15 -080066 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -080067 const std::string filename_;
68
Austin Schuha36c8902019-12-30 18:07:15 -080069 int fd_ = -1;
70
71 // Size of all the data in the queue.
72 size_t queued_size_ = 0;
Brian Silverman98360e22020-04-28 16:51:20 -070073 size_t written_size_ = 0;
Austin Schuha36c8902019-12-30 18:07:15 -080074
75 // List of buffers to flush.
76 std::vector<flatbuffers::DetachedBuffer> queue_;
77 // List of iovecs to use with writev. This is a member variable to avoid
78 // churn.
79 std::vector<struct iovec> iovec_;
80};
81
82// Packes a message pointed to by the context into a MessageHeader.
83flatbuffers::Offset<MessageHeader> PackMessage(
84 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
85 int channel_index, LogType log_type);
86
Austin Schuh6f3babe2020-01-26 20:34:50 -080087FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename);
88
Austin Schuh05b70472020-01-01 17:11:17 -080089// Class to read chunks out of a log file.
90class SpanReader {
91 public:
92 SpanReader(std::string_view filename);
Austin Schuha36c8902019-12-30 18:07:15 -080093
Austin Schuh05b70472020-01-01 17:11:17 -080094 ~SpanReader() { close(fd_); }
95
Austin Schuh6f3babe2020-01-26 20:34:50 -080096 std::string_view filename() const { return filename_; }
97
Austin Schuh05b70472020-01-01 17:11:17 -080098 // Returns a span with the data for a message from the log file, excluding
99 // the size.
100 absl::Span<const uint8_t> ReadMessage();
101
102 // Returns true if there is a full message available in the buffer, or if we
103 // will have to read more data from disk.
104 bool MessageAvailable();
105
106 private:
107 // TODO(austin): Optimization:
108 // Allocate the 256k blocks like we do today. But, refcount them with
109 // shared_ptr pointed to by the messageheader that is returned. This avoids
110 // the copy. Need to do more benchmarking.
111
112 // Reads a chunk of data into data_. Returns false if no data was read.
113 bool ReadBlock();
114
Austin Schuh6f3babe2020-01-26 20:34:50 -0800115 const std::string filename_;
116
Austin Schuh05b70472020-01-01 17:11:17 -0800117 // File descriptor for the log file.
118 int fd_ = -1;
119
120 // Allocator which doesn't zero initialize memory.
121 template <typename T>
122 struct DefaultInitAllocator {
123 typedef T value_type;
124
125 template <typename U>
126 void construct(U *p) {
127 ::new (static_cast<void *>(p)) U;
128 }
129
130 template <typename U, typename... Args>
131 void construct(U *p, Args &&... args) {
132 ::new (static_cast<void *>(p)) U(std::forward<Args>(args)...);
133 }
134
135 T *allocate(std::size_t n) {
136 return reinterpret_cast<T *>(::operator new(sizeof(T) * n));
137 }
138
139 template <typename U>
140 void deallocate(U *p, std::size_t /*n*/) {
141 ::operator delete(static_cast<void *>(p));
142 }
143 };
144
145 // Vector to read into. This uses an allocator which doesn't zero
146 // initialize the memory.
147 std::vector<uint8_t, DefaultInitAllocator<uint8_t>> data_;
148
149 // Amount of data consumed already in data_.
150 size_t consumed_data_ = 0;
151
152 // Cached bit for if we have reached the end of the file. Otherwise we will
153 // hammer on the kernel asking for more data each time we send.
154 bool end_of_file_ = false;
155};
156
157// Class which handles reading the header and messages from the log file. This
158// handles any per-file state left before merging below.
159class MessageReader {
160 public:
161 MessageReader(std::string_view filename);
162
Austin Schuh6f3babe2020-01-26 20:34:50 -0800163 std::string_view filename() const { return span_reader_.filename(); }
164
Austin Schuh05b70472020-01-01 17:11:17 -0800165 // Returns the header from the log file.
166 const LogFileHeader *log_file_header() const {
167 return flatbuffers::GetSizePrefixedRoot<LogFileHeader>(
168 configuration_.data());
169 }
170
171 // Returns the minimum maount of data needed to queue up for sorting before
172 // ware guarenteed to not see data out of order.
173 std::chrono::nanoseconds max_out_of_order_duration() const {
174 return max_out_of_order_duration_;
175 }
176
Austin Schuhcde938c2020-02-02 17:30:07 -0800177 // Returns the newest timestamp read out of the log file.
Austin Schuh05b70472020-01-01 17:11:17 -0800178 monotonic_clock::time_point newest_timestamp() const {
179 return newest_timestamp_;
180 }
181
182 // Returns the next message if there is one.
183 std::optional<FlatbufferVector<MessageHeader>> ReadMessage();
184
185 // The time at which we need to read another chunk from the logfile.
186 monotonic_clock::time_point queue_data_time() const {
187 return newest_timestamp() - max_out_of_order_duration();
188 }
189
190 private:
191 // Log chunk reader.
192 SpanReader span_reader_;
193
194 // Vector holding the data for the configuration.
195 std::vector<uint8_t> configuration_;
196
197 // Minimum amount of data to queue up for sorting before we are guarenteed
198 // to not see data out of order.
199 std::chrono::nanoseconds max_out_of_order_duration_;
200
201 // Timestamp of the newest message in a channel queue.
202 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
203};
204
Austin Schuh6f3babe2020-01-26 20:34:50 -0800205class TimestampMerger;
Austin Schuh05b70472020-01-01 17:11:17 -0800206
Austin Schuh6f3babe2020-01-26 20:34:50 -0800207// A design requirement is that the relevant data for a channel is not more than
208// max_out_of_order_duration out of order. We approach sorting in layers.
209//
210// 1) Split each (maybe chunked) log file into one queue per channel. Read this
211// log file looking for data pertaining to a specific node.
212// (SplitMessageReader)
213// 2) Merge all the data per channel from the different log files into a sorted
214// list of timestamps and messages. (TimestampMerger)
215// 3) Combine the timestamps and messages. (TimestampMerger)
216// 4) Merge all the channels to produce the next message on a node.
217// (ChannelMerger)
218// 5) Duplicate this entire stack per node.
219
220// This class splits messages and timestamps up into a queue per channel, and
221// handles reading data from multiple chunks.
222class SplitMessageReader {
223 public:
224 SplitMessageReader(const std::vector<std::string> &filenames);
225
226 // Sets the TimestampMerger that gets notified for each channel. The node
227 // that the TimestampMerger is merging as needs to be passed in.
228 void SetTimestampMerger(TimestampMerger *timestamp_merger, int channel,
229 const Node *target_node);
230
231 // Returns the (timestamp, queue_idex) for the oldest message in a channel, or
232 // max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800233 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
234 oldest_message(int channel) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800235 return channels_[channel].data.front_timestamp();
236 }
237
238 // Returns the (timestamp, queue_index) for the oldest delivery time in a
239 // channel, or max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800240 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
241 oldest_message(int channel, int destination_node) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800242 return channels_[channel].timestamps[destination_node].front_timestamp();
243 }
244
245 // Returns the timestamp, queue_index, and message for the oldest data on a
246 // channel. Requeues data as needed.
247 std::tuple<monotonic_clock::time_point, uint32_t,
248 FlatbufferVector<MessageHeader>>
249 PopOldest(int channel_index);
250
251 // Returns the timestamp, queue_index, and message for the oldest timestamp on
252 // a channel delivered to a node. Requeues data as needed.
253 std::tuple<monotonic_clock::time_point, uint32_t,
254 FlatbufferVector<MessageHeader>>
255 PopOldest(int channel, int node_index);
256
257 // Returns the header for the log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800258 const LogFileHeader *log_file_header() const {
Austin Schuhfa895892020-01-07 20:07:41 -0800259 return &log_file_header_.message();
Austin Schuh05b70472020-01-01 17:11:17 -0800260 }
261
Austin Schuh6f3babe2020-01-26 20:34:50 -0800262 // Returns the starting time for this set of log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800263 monotonic_clock::time_point monotonic_start_time() {
264 return monotonic_clock::time_point(
265 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
266 }
267 realtime_clock::time_point realtime_start_time() {
268 return realtime_clock::time_point(
269 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
270 }
271
Austin Schuh6f3babe2020-01-26 20:34:50 -0800272 // Returns the configuration from the log file header.
273 const Configuration *configuration() const {
274 return log_file_header()->configuration();
275 }
276
Austin Schuh05b70472020-01-01 17:11:17 -0800277 // Returns the node who's point of view this log file is from. Make sure this
278 // is a pointer in the configuration() nodes list so it can be consumed
279 // elsewhere.
280 const Node *node() const {
281 if (configuration()->has_nodes()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800282 return configuration::GetNodeOrDie(configuration(),
283 log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800284 } else {
285 CHECK(!log_file_header()->has_node());
286 return nullptr;
287 }
288 }
289
Austin Schuh6f3babe2020-01-26 20:34:50 -0800290 // Returns the timestamp of the newest message read from the log file, and the
291 // timestamp that we need to re-queue data.
292 monotonic_clock::time_point newest_timestamp() const {
Austin Schuhcde938c2020-02-02 17:30:07 -0800293 return newest_timestamp_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800294 }
295
Austin Schuhcde938c2020-02-02 17:30:07 -0800296 // Returns the next time to trigger a requeue.
297 monotonic_clock::time_point time_to_queue() const { return time_to_queue_; }
298
299 // Returns the minimum amount of data needed to queue up for sorting before
300 // ware guarenteed to not see data out of order.
301 std::chrono::nanoseconds max_out_of_order_duration() const {
302 return message_reader_->max_out_of_order_duration();
303 }
304
305 std::string_view filename() const { return message_reader_->filename(); }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800306
307 // Adds more messages to the sorted list. This reads enough data such that
308 // oldest_message_time can be replayed safely. Returns false if the log file
309 // has all been read.
310 bool QueueMessages(monotonic_clock::time_point oldest_message_time);
Austin Schuh05b70472020-01-01 17:11:17 -0800311
Austin Schuhcde938c2020-02-02 17:30:07 -0800312 // Returns debug strings for a channel, and timestamps for a node.
313 std::string DebugString(int channel) const;
314 std::string DebugString(int channel, int node_index) const;
315
Austin Schuh8bd96322020-02-13 21:18:22 -0800316 // Returns true if all the messages have been queued from the last log file in
317 // the list of log files chunks.
318 bool at_end() const { return at_end_; }
319
Austin Schuh05b70472020-01-01 17:11:17 -0800320 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800321 // TODO(austin): Need to copy or refcount the message instead of running
322 // multiple copies of the reader. Or maybe have a "as_node" index and hide it
323 // inside.
324
Austin Schuhfa895892020-01-07 20:07:41 -0800325 // Moves to the next log file in the list.
326 bool NextLogFile();
327
Austin Schuh6f3babe2020-01-26 20:34:50 -0800328 // Filenames of the log files.
329 std::vector<std::string> filenames_;
330 // And the index of the next file to open.
331 size_t next_filename_index_ = 0;
Austin Schuh05b70472020-01-01 17:11:17 -0800332
Austin Schuh6f3babe2020-01-26 20:34:50 -0800333 // Log file header to report. This is a copy.
334 FlatbufferDetachedBuffer<LogFileHeader> log_file_header_;
335 // Current log file being read.
336 std::unique_ptr<MessageReader> message_reader_;
Austin Schuh05b70472020-01-01 17:11:17 -0800337
338 // Datastructure to hold the list of messages, cached timestamp for the
339 // oldest message, and sender to send with.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800340 struct MessageHeaderQueue {
341 // If true, this is a timestamp queue.
342 bool timestamps = false;
Austin Schuh05b70472020-01-01 17:11:17 -0800343
Austin Schuh6f3babe2020-01-26 20:34:50 -0800344 // Returns a reference to the the oldest message.
345 FlatbufferVector<MessageHeader> &front() {
346 CHECK_GT(data_.size(), 0u);
347 return data_.front();
Austin Schuh05b70472020-01-01 17:11:17 -0800348 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800349
Austin Schuhcde938c2020-02-02 17:30:07 -0800350 // Adds a message to the back of the queue. Returns true if it was actually
351 // emplaced.
352 bool emplace_back(FlatbufferVector<MessageHeader> &&msg);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800353
354 // Drops the front message. Invalidates the front() reference.
355 void pop_front();
356
357 // The size of the queue.
358 size_t size() { return data_.size(); }
359
Austin Schuhcde938c2020-02-02 17:30:07 -0800360 // Returns a debug string with info about each message in the queue.
361 std::string DebugString() const;
362
Austin Schuh6f3babe2020-01-26 20:34:50 -0800363 // Returns the (timestamp, queue_index) for the oldest message.
Austin Schuhcde938c2020-02-02 17:30:07 -0800364 const std::tuple<monotonic_clock::time_point, uint32_t,
365 const MessageHeader *>
366 front_timestamp() {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800367 CHECK_GT(data_.size(), 0u);
368 return std::make_tuple(
369 monotonic_clock::time_point(std::chrono::nanoseconds(
370 front().message().monotonic_sent_time())),
Austin Schuhcde938c2020-02-02 17:30:07 -0800371 front().message().queue_index(), &front().message());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800372 }
373
374 // Pointer to the timestamp merger for this queue if available.
375 TimestampMerger *timestamp_merger = nullptr;
376 // Pointer to the reader which feeds this queue.
377 SplitMessageReader *split_reader = nullptr;
378
379 private:
380 // The data.
381 std::deque<FlatbufferVector<MessageHeader>> data_;
Austin Schuh05b70472020-01-01 17:11:17 -0800382 };
383
Austin Schuh6f3babe2020-01-26 20:34:50 -0800384 // All the queues needed for a channel. There isn't going to be data in all
385 // of these.
386 struct ChannelData {
387 // The data queue for the channel.
388 MessageHeaderQueue data;
389 // Queues for timestamps for each node.
390 std::vector<MessageHeaderQueue> timestamps;
391 };
Austin Schuhfa895892020-01-07 20:07:41 -0800392
Austin Schuh6f3babe2020-01-26 20:34:50 -0800393 // Data for all the channels.
Austin Schuh05b70472020-01-01 17:11:17 -0800394 std::vector<ChannelData> channels_;
395
Austin Schuh6f3babe2020-01-26 20:34:50 -0800396 // Once we know the node that this SplitMessageReader will be writing as,
397 // there will be only one MessageHeaderQueue that a specific channel matches.
398 // Precompute this here for efficiency.
399 std::vector<MessageHeaderQueue *> channels_to_write_;
400
Austin Schuhcde938c2020-02-02 17:30:07 -0800401 monotonic_clock::time_point time_to_queue_ = monotonic_clock::min_time;
402
403 // Latches true when we hit the end of the last log file and there is no sense
404 // poking it further.
405 bool at_end_ = false;
406
407 // Timestamp of the newest message that was read and actually queued. We want
408 // to track this independently from the log file because we need the
409 // timestamps here to be timestamps of messages that are queued.
410 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800411};
412
413class ChannelMerger;
414
415// Sorts channels (and timestamps) from multiple log files for a single channel.
416class TimestampMerger {
417 public:
418 TimestampMerger(const Configuration *configuration,
419 std::vector<SplitMessageReader *> split_message_readers,
420 int channel_index, const Node *target_node,
421 ChannelMerger *channel_merger);
422
423 // Metadata used to schedule the message.
424 struct DeliveryTimestamp {
425 monotonic_clock::time_point monotonic_event_time =
426 monotonic_clock::min_time;
427 realtime_clock::time_point realtime_event_time = realtime_clock::min_time;
428
429 monotonic_clock::time_point monotonic_remote_time =
430 monotonic_clock::min_time;
431 realtime_clock::time_point realtime_remote_time = realtime_clock::min_time;
432 uint32_t remote_queue_index = 0xffffffff;
433 };
434
435 // Pushes SplitMessageReader onto the timestamp heap. This should only be
436 // called when timestamps are placed in the channel this class is merging for
437 // the reader.
438 void UpdateTimestamp(
439 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800440 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
441 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800442 PushTimestampHeap(oldest_message_time, split_message_reader);
443 }
444 // Pushes SplitMessageReader onto the message heap. This should only be
445 // called when data is placed in the channel this class is merging for the
446 // reader.
447 void Update(
448 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800449 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
450 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800451 PushMessageHeap(oldest_message_time, split_message_reader);
452 }
453
Austin Schuhcde938c2020-02-02 17:30:07 -0800454 // Returns the oldest combined timestamp and data for this channel. If there
455 // isn't a matching piece of data, returns only the timestamp with no data.
456 // The caller can determine what the appropriate action is to recover.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800457 std::tuple<DeliveryTimestamp, FlatbufferVector<MessageHeader>> PopOldest();
458
Austin Schuh8bd96322020-02-13 21:18:22 -0800459 // Returns the oldest forwarding timestamp.
460 DeliveryTimestamp OldestTimestamp() const;
461
Austin Schuh6f3babe2020-01-26 20:34:50 -0800462 // Tracks if the channel merger has pushed this onto it's heap or not.
463 bool pushed() { return pushed_; }
464 // Sets if this has been pushed to the channel merger heap. Should only be
465 // called by the channel merger.
466 void set_pushed(bool pushed) { pushed_ = pushed; }
467
Austin Schuhcde938c2020-02-02 17:30:07 -0800468 // Returns a debug string with the heaps printed out.
469 std::string DebugString() const;
470
Austin Schuh8bd96322020-02-13 21:18:22 -0800471 // Returns true if we have timestamps.
472 bool has_timestamps() const { return has_timestamps_; }
473
474 // Records that one of the log files ran out of data. This should only be
475 // called by a SplitMessageReader.
476 void NoticeAtEnd();
477
Austin Schuh6f3babe2020-01-26 20:34:50 -0800478 private:
479 // Pushes messages and timestamps to the corresponding heaps.
480 void PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800481 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
482 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800483 SplitMessageReader *split_message_reader);
484 void PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800485 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
486 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800487 SplitMessageReader *split_message_reader);
488
489 // Pops a message from the message heap. This automatically triggers the
490 // split message reader to re-fetch any new data.
491 std::tuple<monotonic_clock::time_point, uint32_t,
492 FlatbufferVector<MessageHeader>>
493 PopMessageHeap();
Austin Schuhcde938c2020-02-02 17:30:07 -0800494
495 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
496 oldest_message() const;
497 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
498 oldest_timestamp() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800499 // Pops a message from the timestamp heap. This automatically triggers the
500 // split message reader to re-fetch any new data.
501 std::tuple<monotonic_clock::time_point, uint32_t,
502 FlatbufferVector<MessageHeader>>
503 PopTimestampHeap();
504
505 const Configuration *configuration_;
506
507 // If true, this is a forwarded channel and timestamps should be matched.
508 bool has_timestamps_ = false;
509
510 // Tracks if the ChannelMerger has pushed this onto it's queue.
511 bool pushed_ = false;
512
513 // The split message readers used for source data.
514 std::vector<SplitMessageReader *> split_message_readers_;
515
516 // The channel to merge.
517 int channel_index_;
518
519 // Our node.
520 int node_index_;
521
522 // Heaps for messages and timestamps.
523 std::vector<
524 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
525 message_heap_;
526 std::vector<
527 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
528 timestamp_heap_;
529
530 // Parent channel merger.
531 ChannelMerger *channel_merger_;
532};
533
534// This class handles constructing all the split message readers, channel
535// mergers, and combining the results.
536class ChannelMerger {
537 public:
538 // Builds a ChannelMerger around a set of log files. These are of the format:
539 // {
540 // {log1_part0, log1_part1, ...},
541 // {log2}
542 // }
543 // The inner vector is a list of log file chunks which form up a log file.
544 // The outer vector is a list of log files with subsets of the messages, or
545 // messages from different nodes.
546 ChannelMerger(const std::vector<std::vector<std::string>> &filenames);
547
548 // Returns the nodes that we know how to merge.
549 const std::vector<const Node *> nodes() const;
550 // Sets the node that we will return messages as. Returns true if the node
551 // has log files and will produce data. This can only be called once, and
552 // will likely corrupt state if called a second time.
553 bool SetNode(const Node *target_node);
554
555 // Everything else needs the node set before it works.
556
557 // Returns a timestamp for the oldest message in this group of logfiles.
558 monotonic_clock::time_point OldestMessage() const;
559 // Pops the oldest message.
560 std::tuple<TimestampMerger::DeliveryTimestamp, int,
561 FlatbufferVector<MessageHeader>>
562 PopOldest();
563
Austin Schuh8bd96322020-02-13 21:18:22 -0800564 // Returns the oldest timestamp in the timestamp heap.
565 TimestampMerger::DeliveryTimestamp OldestTimestamp() const;
566 // Returns the oldest timestamp in the timestamp heap for a specific channel.
567 TimestampMerger::DeliveryTimestamp OldestTimestampForChannel(
568 int channel) const;
569
Austin Schuh6f3babe2020-01-26 20:34:50 -0800570 // Returns the config for this set of log files.
571 const Configuration *configuration() const {
572 return log_file_header()->configuration();
573 }
574
575 const LogFileHeader *log_file_header() const {
576 return &log_file_header_.message();
577 }
578
579 // Returns the start times for the configured node's log files.
Austin Schuhcde938c2020-02-02 17:30:07 -0800580 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800581 return monotonic_clock::time_point(
582 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
583 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800584 realtime_clock::time_point realtime_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800585 return realtime_clock::time_point(
586 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
587 }
588
589 // Returns the node set by SetNode above.
590 const Node *node() const { return node_; }
591
592 // Called by the TimestampMerger when new data is available with the provided
593 // timestamp and channel_index.
594 void Update(monotonic_clock::time_point timestamp, int channel_index) {
595 PushChannelHeap(timestamp, channel_index);
596 }
597
Austin Schuhcde938c2020-02-02 17:30:07 -0800598 // Returns a debug string with all the heaps in it. Generally only useful for
599 // debugging what went wrong.
600 std::string DebugString() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800601
Austin Schuh8bd96322020-02-13 21:18:22 -0800602 // Returns true if one of the log files has finished reading everything. When
603 // log file chunks are involved, this means that the last chunk in a log file
604 // has been read. It is acceptable to be missing data at this point in time.
605 bool at_end() const { return at_end_; }
606
607 // Marks that one of the log files is at the end. This should only be called
608 // by timestamp mergers.
609 void NoticeAtEnd() { at_end_ = true; }
610
Austin Schuhcde938c2020-02-02 17:30:07 -0800611 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800612 // Pushes the timestamp for new data on the provided channel.
613 void PushChannelHeap(monotonic_clock::time_point timestamp,
614 int channel_index);
615
616 // All the message readers.
617 std::vector<std::unique_ptr<SplitMessageReader>> split_message_readers_;
618
619 // The log header we are claiming to be.
620 FlatbufferDetachedBuffer<LogFileHeader> log_file_header_;
621
622 // The timestamp mergers which combine data from the split message readers.
623 std::vector<TimestampMerger> timestamp_mergers_;
624
625 // A heap of the channel readers and timestamps for the oldest data in each.
Austin Schuh05b70472020-01-01 17:11:17 -0800626 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800627 // A heap of just the timestamp channel readers and timestamps for the oldest
628 // data in each.
629 std::vector<std::pair<monotonic_clock::time_point, int>> timestamp_heap_;
Austin Schuh05b70472020-01-01 17:11:17 -0800630
Austin Schuh6f3babe2020-01-26 20:34:50 -0800631 // Configured node.
632 const Node *node_;
633
Austin Schuh8bd96322020-02-13 21:18:22 -0800634 bool at_end_ = false;
635
Austin Schuh6f3babe2020-01-26 20:34:50 -0800636 // Cached copy of the list of nodes.
637 std::vector<const Node *> nodes_;
Austin Schuh05b70472020-01-01 17:11:17 -0800638};
Austin Schuha36c8902019-12-30 18:07:15 -0800639
640} // namespace logger
641} // namespace aos
642
643#endif // AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_