blob: 87ea229571190df727067b0f40413788a70c4e4f [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#ifndef AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
2#define AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
3
4#include <sys/uio.h>
5
Austin Schuh97789fc2020-08-01 14:42:45 -07006#include <chrono>
Austin Schuh05b70472020-01-01 17:11:17 -08007#include <deque>
Austin Schuh97789fc2020-08-01 14:42:45 -07008#include <limits>
9#include <memory>
Austin Schuh05b70472020-01-01 17:11:17 -080010#include <optional>
Austin Schuhfa895892020-01-07 20:07:41 -080011#include <string>
Austin Schuha36c8902019-12-30 18:07:15 -080012#include <string_view>
Brian Silverman98360e22020-04-28 16:51:20 -070013#include <tuple>
Austin Schuh97789fc2020-08-01 14:42:45 -070014#include <utility>
Austin Schuha36c8902019-12-30 18:07:15 -080015#include <vector>
16
Austin Schuh05b70472020-01-01 17:11:17 -080017#include "absl/types/span.h"
Austin Schuha36c8902019-12-30 18:07:15 -080018#include "aos/events/event_loop.h"
19#include "aos/events/logging/logger_generated.h"
20#include "flatbuffers/flatbuffers.h"
21
22namespace aos {
23namespace logger {
24
25enum class LogType : uint8_t {
26 // The message originated on this node and should be logged here.
27 kLogMessage,
28 // The message originated on another node, but only the delivery times are
29 // logged here.
30 kLogDeliveryTimeOnly,
31 // The message originated on another node. Log it and the delivery times
32 // together. The message_gateway is responsible for logging any messages
33 // which didn't get delivered.
Austin Schuh6f3babe2020-01-26 20:34:50 -080034 kLogMessageAndDeliveryTime,
35 // The message originated on the other node and should be logged on this node.
36 kLogRemoteMessage
Austin Schuha36c8902019-12-30 18:07:15 -080037};
38
Austin Schuha36c8902019-12-30 18:07:15 -080039// This class manages efficiently writing a sequence of detached buffers to a
40// file. It queues them up and batches the write operation.
41class DetachedBufferWriter {
42 public:
43 DetachedBufferWriter(std::string_view filename);
Austin Schuh2f8fd752020-09-01 22:38:28 -070044 DetachedBufferWriter(DetachedBufferWriter &&other);
45 DetachedBufferWriter(const DetachedBufferWriter &) = delete;
46
Austin Schuha36c8902019-12-30 18:07:15 -080047 ~DetachedBufferWriter();
48
Austin Schuh2f8fd752020-09-01 22:38:28 -070049 DetachedBufferWriter &operator=(DetachedBufferWriter &&other);
Brian Silverman98360e22020-04-28 16:51:20 -070050 DetachedBufferWriter &operator=(const DetachedBufferWriter &) = delete;
51
Austin Schuh6f3babe2020-01-26 20:34:50 -080052 std::string_view filename() const { return filename_; }
53
Austin Schuh2f8fd752020-09-01 22:38:28 -070054 // Rewrites a location in a file (relative to the start) to have new data in
55 // it. The main use case is updating start times after a log file starts.
56 void RewriteLocation(off64_t offset, absl::Span<const uint8_t> data);
57
Austin Schuha36c8902019-12-30 18:07:15 -080058 // TODO(austin): Snappy compress the log file if it ends with .snappy!
59
60 // Queues up a finished FlatBufferBuilder to be written. Steals the detached
61 // buffer from it.
62 void QueueSizedFlatbuffer(flatbuffers::FlatBufferBuilder *fbb);
63 // Queues up a detached buffer directly.
64 void QueueSizedFlatbuffer(flatbuffers::DetachedBuffer &&buffer);
Austin Schuhde031b72020-01-10 19:34:41 -080065 // Writes a Span. This is not terribly optimized right now.
66 void WriteSizedFlatbuffer(absl::Span<const uint8_t> span);
Austin Schuha36c8902019-12-30 18:07:15 -080067
68 // Triggers data to be provided to the kernel and written.
69 void Flush();
70
Brian Silverman98360e22020-04-28 16:51:20 -070071 // Returns the number of bytes written.
72 size_t written_size() const { return written_size_; }
73
74 // Returns the number of bytes written or currently queued.
75 size_t total_size() const { return written_size_ + queued_size_; }
76
Austin Schuha36c8902019-12-30 18:07:15 -080077 private:
Austin Schuh2f8fd752020-09-01 22:38:28 -070078 std::string filename_;
Austin Schuh6f3babe2020-01-26 20:34:50 -080079
Austin Schuha36c8902019-12-30 18:07:15 -080080 int fd_ = -1;
81
82 // Size of all the data in the queue.
83 size_t queued_size_ = 0;
Brian Silverman98360e22020-04-28 16:51:20 -070084 size_t written_size_ = 0;
Austin Schuha36c8902019-12-30 18:07:15 -080085
86 // List of buffers to flush.
87 std::vector<flatbuffers::DetachedBuffer> queue_;
88 // List of iovecs to use with writev. This is a member variable to avoid
89 // churn.
90 std::vector<struct iovec> iovec_;
91};
92
93// Packes a message pointed to by the context into a MessageHeader.
94flatbuffers::Offset<MessageHeader> PackMessage(
95 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
96 int channel_index, LogType log_type);
97
Austin Schuh6f3babe2020-01-26 20:34:50 -080098FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename);
Austin Schuh5212cad2020-09-09 23:12:09 -070099FlatbufferVector<MessageHeader> ReadNthMessage(std::string_view filename,
100 size_t n);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800101
Austin Schuh05b70472020-01-01 17:11:17 -0800102// Class to read chunks out of a log file.
103class SpanReader {
104 public:
105 SpanReader(std::string_view filename);
Austin Schuha36c8902019-12-30 18:07:15 -0800106
Austin Schuh05b70472020-01-01 17:11:17 -0800107 ~SpanReader() { close(fd_); }
108
Austin Schuh6f3babe2020-01-26 20:34:50 -0800109 std::string_view filename() const { return filename_; }
110
Austin Schuh05b70472020-01-01 17:11:17 -0800111 // Returns a span with the data for a message from the log file, excluding
112 // the size.
113 absl::Span<const uint8_t> ReadMessage();
114
115 // Returns true if there is a full message available in the buffer, or if we
116 // will have to read more data from disk.
117 bool MessageAvailable();
118
119 private:
120 // TODO(austin): Optimization:
121 // Allocate the 256k blocks like we do today. But, refcount them with
122 // shared_ptr pointed to by the messageheader that is returned. This avoids
123 // the copy. Need to do more benchmarking.
124
125 // Reads a chunk of data into data_. Returns false if no data was read.
126 bool ReadBlock();
127
Austin Schuh6f3babe2020-01-26 20:34:50 -0800128 const std::string filename_;
129
Austin Schuh05b70472020-01-01 17:11:17 -0800130 // File descriptor for the log file.
131 int fd_ = -1;
132
133 // Allocator which doesn't zero initialize memory.
134 template <typename T>
135 struct DefaultInitAllocator {
136 typedef T value_type;
137
138 template <typename U>
139 void construct(U *p) {
140 ::new (static_cast<void *>(p)) U;
141 }
142
143 template <typename U, typename... Args>
144 void construct(U *p, Args &&... args) {
145 ::new (static_cast<void *>(p)) U(std::forward<Args>(args)...);
146 }
147
148 T *allocate(std::size_t n) {
149 return reinterpret_cast<T *>(::operator new(sizeof(T) * n));
150 }
151
152 template <typename U>
153 void deallocate(U *p, std::size_t /*n*/) {
154 ::operator delete(static_cast<void *>(p));
155 }
156 };
157
158 // Vector to read into. This uses an allocator which doesn't zero
159 // initialize the memory.
160 std::vector<uint8_t, DefaultInitAllocator<uint8_t>> data_;
161
162 // Amount of data consumed already in data_.
163 size_t consumed_data_ = 0;
164
165 // Cached bit for if we have reached the end of the file. Otherwise we will
166 // hammer on the kernel asking for more data each time we send.
167 bool end_of_file_ = false;
168};
169
170// Class which handles reading the header and messages from the log file. This
171// handles any per-file state left before merging below.
172class MessageReader {
173 public:
174 MessageReader(std::string_view filename);
175
Austin Schuh6f3babe2020-01-26 20:34:50 -0800176 std::string_view filename() const { return span_reader_.filename(); }
177
Austin Schuh05b70472020-01-01 17:11:17 -0800178 // Returns the header from the log file.
179 const LogFileHeader *log_file_header() const {
Austin Schuh97789fc2020-08-01 14:42:45 -0700180 return &raw_log_file_header_.message();
181 }
182
183 // Returns the raw data of the header from the log file.
184 const FlatbufferVector<LogFileHeader> &raw_log_file_header() const {
185 return raw_log_file_header_;
Austin Schuh05b70472020-01-01 17:11:17 -0800186 }
187
188 // Returns the minimum maount of data needed to queue up for sorting before
189 // ware guarenteed to not see data out of order.
190 std::chrono::nanoseconds max_out_of_order_duration() const {
191 return max_out_of_order_duration_;
192 }
193
Austin Schuhcde938c2020-02-02 17:30:07 -0800194 // Returns the newest timestamp read out of the log file.
Austin Schuh05b70472020-01-01 17:11:17 -0800195 monotonic_clock::time_point newest_timestamp() const {
196 return newest_timestamp_;
197 }
198
199 // Returns the next message if there is one.
200 std::optional<FlatbufferVector<MessageHeader>> ReadMessage();
201
202 // The time at which we need to read another chunk from the logfile.
203 monotonic_clock::time_point queue_data_time() const {
204 return newest_timestamp() - max_out_of_order_duration();
205 }
206
207 private:
208 // Log chunk reader.
209 SpanReader span_reader_;
210
Austin Schuh97789fc2020-08-01 14:42:45 -0700211 // Vector holding the raw data for the log file header.
212 FlatbufferVector<LogFileHeader> raw_log_file_header_;
Austin Schuh05b70472020-01-01 17:11:17 -0800213
214 // Minimum amount of data to queue up for sorting before we are guarenteed
215 // to not see data out of order.
216 std::chrono::nanoseconds max_out_of_order_duration_;
217
218 // Timestamp of the newest message in a channel queue.
219 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
220};
221
Austin Schuh6f3babe2020-01-26 20:34:50 -0800222class TimestampMerger;
Austin Schuh05b70472020-01-01 17:11:17 -0800223
Austin Schuh6f3babe2020-01-26 20:34:50 -0800224// A design requirement is that the relevant data for a channel is not more than
225// max_out_of_order_duration out of order. We approach sorting in layers.
226//
227// 1) Split each (maybe chunked) log file into one queue per channel. Read this
228// log file looking for data pertaining to a specific node.
229// (SplitMessageReader)
230// 2) Merge all the data per channel from the different log files into a sorted
231// list of timestamps and messages. (TimestampMerger)
232// 3) Combine the timestamps and messages. (TimestampMerger)
233// 4) Merge all the channels to produce the next message on a node.
234// (ChannelMerger)
235// 5) Duplicate this entire stack per node.
236
237// This class splits messages and timestamps up into a queue per channel, and
238// handles reading data from multiple chunks.
239class SplitMessageReader {
240 public:
241 SplitMessageReader(const std::vector<std::string> &filenames);
242
243 // Sets the TimestampMerger that gets notified for each channel. The node
244 // that the TimestampMerger is merging as needs to be passed in.
245 void SetTimestampMerger(TimestampMerger *timestamp_merger, int channel,
246 const Node *target_node);
247
Austin Schuh2f8fd752020-09-01 22:38:28 -0700248 // Returns the (timestamp, queue_index, message_header) for the oldest message
249 // in a channel, or max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800250 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
251 oldest_message(int channel) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800252 return channels_[channel].data.front_timestamp();
253 }
254
Austin Schuh2f8fd752020-09-01 22:38:28 -0700255 // Returns the (timestamp, queue_index, message_header) for the oldest
256 // delivery time in a channel, or max_time if there is nothing in the channel.
Austin Schuhcde938c2020-02-02 17:30:07 -0800257 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
258 oldest_message(int channel, int destination_node) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800259 return channels_[channel].timestamps[destination_node].front_timestamp();
260 }
261
262 // Returns the timestamp, queue_index, and message for the oldest data on a
263 // channel. Requeues data as needed.
264 std::tuple<monotonic_clock::time_point, uint32_t,
265 FlatbufferVector<MessageHeader>>
266 PopOldest(int channel_index);
267
268 // Returns the timestamp, queue_index, and message for the oldest timestamp on
269 // a channel delivered to a node. Requeues data as needed.
270 std::tuple<monotonic_clock::time_point, uint32_t,
271 FlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -0700272 PopOldestTimestamp(int channel, int node_index);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800273
274 // Returns the header for the log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800275 const LogFileHeader *log_file_header() const {
Austin Schuhfa895892020-01-07 20:07:41 -0800276 return &log_file_header_.message();
Austin Schuh05b70472020-01-01 17:11:17 -0800277 }
278
Austin Schuh97789fc2020-08-01 14:42:45 -0700279 const FlatbufferVector<LogFileHeader> &raw_log_file_header() const {
280 return log_file_header_;
281 }
282
Austin Schuh6f3babe2020-01-26 20:34:50 -0800283 // Returns the starting time for this set of log files.
Austin Schuh05b70472020-01-01 17:11:17 -0800284 monotonic_clock::time_point monotonic_start_time() {
285 return monotonic_clock::time_point(
286 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
287 }
288 realtime_clock::time_point realtime_start_time() {
289 return realtime_clock::time_point(
290 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
291 }
292
Austin Schuh6f3babe2020-01-26 20:34:50 -0800293 // Returns the configuration from the log file header.
294 const Configuration *configuration() const {
295 return log_file_header()->configuration();
296 }
297
Austin Schuh05b70472020-01-01 17:11:17 -0800298 // Returns the node who's point of view this log file is from. Make sure this
299 // is a pointer in the configuration() nodes list so it can be consumed
300 // elsewhere.
301 const Node *node() const {
302 if (configuration()->has_nodes()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800303 return configuration::GetNodeOrDie(configuration(),
304 log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800305 } else {
306 CHECK(!log_file_header()->has_node());
307 return nullptr;
308 }
309 }
310
Austin Schuh6f3babe2020-01-26 20:34:50 -0800311 // Returns the timestamp of the newest message read from the log file, and the
312 // timestamp that we need to re-queue data.
313 monotonic_clock::time_point newest_timestamp() const {
Austin Schuhcde938c2020-02-02 17:30:07 -0800314 return newest_timestamp_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800315 }
316
Austin Schuhcde938c2020-02-02 17:30:07 -0800317 // Returns the next time to trigger a requeue.
318 monotonic_clock::time_point time_to_queue() const { return time_to_queue_; }
319
320 // Returns the minimum amount of data needed to queue up for sorting before
321 // ware guarenteed to not see data out of order.
322 std::chrono::nanoseconds max_out_of_order_duration() const {
323 return message_reader_->max_out_of_order_duration();
324 }
325
326 std::string_view filename() const { return message_reader_->filename(); }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800327
328 // Adds more messages to the sorted list. This reads enough data such that
329 // oldest_message_time can be replayed safely. Returns false if the log file
330 // has all been read.
331 bool QueueMessages(monotonic_clock::time_point oldest_message_time);
Austin Schuh05b70472020-01-01 17:11:17 -0800332
Austin Schuhcde938c2020-02-02 17:30:07 -0800333 // Returns debug strings for a channel, and timestamps for a node.
334 std::string DebugString(int channel) const;
335 std::string DebugString(int channel, int node_index) const;
336
Austin Schuh8bd96322020-02-13 21:18:22 -0800337 // Returns true if all the messages have been queued from the last log file in
338 // the list of log files chunks.
339 bool at_end() const { return at_end_; }
340
Austin Schuh05b70472020-01-01 17:11:17 -0800341 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800342 // TODO(austin): Need to copy or refcount the message instead of running
343 // multiple copies of the reader. Or maybe have a "as_node" index and hide it
344 // inside.
345
Austin Schuhfa895892020-01-07 20:07:41 -0800346 // Moves to the next log file in the list.
347 bool NextLogFile();
348
Austin Schuh6f3babe2020-01-26 20:34:50 -0800349 // Filenames of the log files.
350 std::vector<std::string> filenames_;
351 // And the index of the next file to open.
352 size_t next_filename_index_ = 0;
Austin Schuh05b70472020-01-01 17:11:17 -0800353
Austin Schuhee711052020-08-24 16:06:09 -0700354 // Node we are reading as.
355 const Node *target_node_ = nullptr;
356
Austin Schuh6f3babe2020-01-26 20:34:50 -0800357 // Log file header to report. This is a copy.
Austin Schuh97789fc2020-08-01 14:42:45 -0700358 FlatbufferVector<LogFileHeader> log_file_header_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800359 // Current log file being read.
360 std::unique_ptr<MessageReader> message_reader_;
Austin Schuh05b70472020-01-01 17:11:17 -0800361
362 // Datastructure to hold the list of messages, cached timestamp for the
363 // oldest message, and sender to send with.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800364 struct MessageHeaderQueue {
365 // If true, this is a timestamp queue.
366 bool timestamps = false;
Austin Schuh05b70472020-01-01 17:11:17 -0800367
Austin Schuh6f3babe2020-01-26 20:34:50 -0800368 // Returns a reference to the the oldest message.
369 FlatbufferVector<MessageHeader> &front() {
370 CHECK_GT(data_.size(), 0u);
371 return data_.front();
Austin Schuh05b70472020-01-01 17:11:17 -0800372 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800373
Austin Schuhcde938c2020-02-02 17:30:07 -0800374 // Adds a message to the back of the queue. Returns true if it was actually
375 // emplaced.
376 bool emplace_back(FlatbufferVector<MessageHeader> &&msg);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800377
378 // Drops the front message. Invalidates the front() reference.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700379 void PopFront();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800380
381 // The size of the queue.
382 size_t size() { return data_.size(); }
383
Austin Schuhcde938c2020-02-02 17:30:07 -0800384 // Returns a debug string with info about each message in the queue.
385 std::string DebugString() const;
386
Austin Schuh2f8fd752020-09-01 22:38:28 -0700387 // Returns the (timestamp, queue_index, message_header) for the oldest
388 // message.
Austin Schuhcde938c2020-02-02 17:30:07 -0800389 const std::tuple<monotonic_clock::time_point, uint32_t,
390 const MessageHeader *>
391 front_timestamp() {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700392 const MessageHeader &message = front().message();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800393 return std::make_tuple(
Austin Schuh2f8fd752020-09-01 22:38:28 -0700394 monotonic_clock::time_point(
395 std::chrono::nanoseconds(message.monotonic_sent_time())),
396 message.queue_index(), &message);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800397 }
398
399 // Pointer to the timestamp merger for this queue if available.
400 TimestampMerger *timestamp_merger = nullptr;
401 // Pointer to the reader which feeds this queue.
402 SplitMessageReader *split_reader = nullptr;
403
404 private:
405 // The data.
406 std::deque<FlatbufferVector<MessageHeader>> data_;
Austin Schuh05b70472020-01-01 17:11:17 -0800407 };
408
Austin Schuh6f3babe2020-01-26 20:34:50 -0800409 // All the queues needed for a channel. There isn't going to be data in all
410 // of these.
411 struct ChannelData {
412 // The data queue for the channel.
413 MessageHeaderQueue data;
414 // Queues for timestamps for each node.
415 std::vector<MessageHeaderQueue> timestamps;
416 };
Austin Schuhfa895892020-01-07 20:07:41 -0800417
Austin Schuh6f3babe2020-01-26 20:34:50 -0800418 // Data for all the channels.
Austin Schuh05b70472020-01-01 17:11:17 -0800419 std::vector<ChannelData> channels_;
420
Austin Schuh6f3babe2020-01-26 20:34:50 -0800421 // Once we know the node that this SplitMessageReader will be writing as,
422 // there will be only one MessageHeaderQueue that a specific channel matches.
423 // Precompute this here for efficiency.
424 std::vector<MessageHeaderQueue *> channels_to_write_;
425
Austin Schuhcde938c2020-02-02 17:30:07 -0800426 monotonic_clock::time_point time_to_queue_ = monotonic_clock::min_time;
427
428 // Latches true when we hit the end of the last log file and there is no sense
429 // poking it further.
430 bool at_end_ = false;
431
432 // Timestamp of the newest message that was read and actually queued. We want
433 // to track this independently from the log file because we need the
434 // timestamps here to be timestamps of messages that are queued.
435 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800436};
437
438class ChannelMerger;
439
440// Sorts channels (and timestamps) from multiple log files for a single channel.
441class TimestampMerger {
442 public:
443 TimestampMerger(const Configuration *configuration,
444 std::vector<SplitMessageReader *> split_message_readers,
445 int channel_index, const Node *target_node,
446 ChannelMerger *channel_merger);
447
448 // Metadata used to schedule the message.
449 struct DeliveryTimestamp {
450 monotonic_clock::time_point monotonic_event_time =
451 monotonic_clock::min_time;
452 realtime_clock::time_point realtime_event_time = realtime_clock::min_time;
453
454 monotonic_clock::time_point monotonic_remote_time =
455 monotonic_clock::min_time;
456 realtime_clock::time_point realtime_remote_time = realtime_clock::min_time;
457 uint32_t remote_queue_index = 0xffffffff;
458 };
459
460 // Pushes SplitMessageReader onto the timestamp heap. This should only be
461 // called when timestamps are placed in the channel this class is merging for
462 // the reader.
463 void UpdateTimestamp(
464 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800465 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
466 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800467 PushTimestampHeap(oldest_message_time, split_message_reader);
468 }
469 // Pushes SplitMessageReader onto the message heap. This should only be
470 // called when data is placed in the channel this class is merging for the
471 // reader.
472 void Update(
473 SplitMessageReader *split_message_reader,
Austin Schuhcde938c2020-02-02 17:30:07 -0800474 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
475 oldest_message_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800476 PushMessageHeap(oldest_message_time, split_message_reader);
477 }
478
Austin Schuhcde938c2020-02-02 17:30:07 -0800479 // Returns the oldest combined timestamp and data for this channel. If there
480 // isn't a matching piece of data, returns only the timestamp with no data.
481 // The caller can determine what the appropriate action is to recover.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800482 std::tuple<DeliveryTimestamp, FlatbufferVector<MessageHeader>> PopOldest();
483
484 // Tracks if the channel merger has pushed this onto it's heap or not.
485 bool pushed() { return pushed_; }
486 // Sets if this has been pushed to the channel merger heap. Should only be
487 // called by the channel merger.
488 void set_pushed(bool pushed) { pushed_ = pushed; }
489
Austin Schuhcde938c2020-02-02 17:30:07 -0800490 // Returns a debug string with the heaps printed out.
491 std::string DebugString() const;
492
Austin Schuh8bd96322020-02-13 21:18:22 -0800493 // Returns true if we have timestamps.
494 bool has_timestamps() const { return has_timestamps_; }
495
496 // Records that one of the log files ran out of data. This should only be
497 // called by a SplitMessageReader.
498 void NoticeAtEnd();
499
Austin Schuh2f8fd752020-09-01 22:38:28 -0700500 aos::monotonic_clock::time_point channel_merger_time() {
501 if (has_timestamps_) {
502 return std::get<0>(timestamp_heap_[0]);
503 } else {
504 return std::get<0>(message_heap_[0]);
505 }
506 }
507
Austin Schuh6f3babe2020-01-26 20:34:50 -0800508 private:
509 // Pushes messages and timestamps to the corresponding heaps.
510 void PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800511 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
512 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800513 SplitMessageReader *split_message_reader);
514 void PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800515 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
516 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800517 SplitMessageReader *split_message_reader);
518
519 // Pops a message from the message heap. This automatically triggers the
520 // split message reader to re-fetch any new data.
521 std::tuple<monotonic_clock::time_point, uint32_t,
522 FlatbufferVector<MessageHeader>>
523 PopMessageHeap();
Austin Schuhcde938c2020-02-02 17:30:07 -0800524
525 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
526 oldest_message() const;
527 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
528 oldest_timestamp() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800529 // Pops a message from the timestamp heap. This automatically triggers the
530 // split message reader to re-fetch any new data.
531 std::tuple<monotonic_clock::time_point, uint32_t,
532 FlatbufferVector<MessageHeader>>
533 PopTimestampHeap();
534
535 const Configuration *configuration_;
536
537 // If true, this is a forwarded channel and timestamps should be matched.
538 bool has_timestamps_ = false;
539
540 // Tracks if the ChannelMerger has pushed this onto it's queue.
541 bool pushed_ = false;
542
543 // The split message readers used for source data.
544 std::vector<SplitMessageReader *> split_message_readers_;
545
546 // The channel to merge.
547 int channel_index_;
548
549 // Our node.
550 int node_index_;
551
552 // Heaps for messages and timestamps.
553 std::vector<
554 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
555 message_heap_;
556 std::vector<
557 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
558 timestamp_heap_;
559
560 // Parent channel merger.
561 ChannelMerger *channel_merger_;
562};
563
564// This class handles constructing all the split message readers, channel
565// mergers, and combining the results.
566class ChannelMerger {
567 public:
568 // Builds a ChannelMerger around a set of log files. These are of the format:
569 // {
570 // {log1_part0, log1_part1, ...},
571 // {log2}
572 // }
573 // The inner vector is a list of log file chunks which form up a log file.
574 // The outer vector is a list of log files with subsets of the messages, or
575 // messages from different nodes.
576 ChannelMerger(const std::vector<std::vector<std::string>> &filenames);
577
578 // Returns the nodes that we know how to merge.
579 const std::vector<const Node *> nodes() const;
580 // Sets the node that we will return messages as. Returns true if the node
581 // has log files and will produce data. This can only be called once, and
582 // will likely corrupt state if called a second time.
583 bool SetNode(const Node *target_node);
584
585 // Everything else needs the node set before it works.
586
587 // Returns a timestamp for the oldest message in this group of logfiles.
Austin Schuh858c9f32020-08-31 16:56:12 -0700588 monotonic_clock::time_point OldestMessageTime() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800589 // Pops the oldest message.
590 std::tuple<TimestampMerger::DeliveryTimestamp, int,
591 FlatbufferVector<MessageHeader>>
592 PopOldest();
593
594 // Returns the config for this set of log files.
595 const Configuration *configuration() const {
596 return log_file_header()->configuration();
597 }
598
599 const LogFileHeader *log_file_header() const {
600 return &log_file_header_.message();
601 }
602
603 // Returns the start times for the configured node's log files.
Austin Schuhcde938c2020-02-02 17:30:07 -0800604 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800605 return monotonic_clock::time_point(
606 std::chrono::nanoseconds(log_file_header()->monotonic_start_time()));
607 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800608 realtime_clock::time_point realtime_start_time() const {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800609 return realtime_clock::time_point(
610 std::chrono::nanoseconds(log_file_header()->realtime_start_time()));
611 }
612
613 // Returns the node set by SetNode above.
614 const Node *node() const { return node_; }
615
616 // Called by the TimestampMerger when new data is available with the provided
617 // timestamp and channel_index.
618 void Update(monotonic_clock::time_point timestamp, int channel_index) {
619 PushChannelHeap(timestamp, channel_index);
620 }
621
Austin Schuhcde938c2020-02-02 17:30:07 -0800622 // Returns a debug string with all the heaps in it. Generally only useful for
623 // debugging what went wrong.
624 std::string DebugString() const;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800625
Austin Schuh8bd96322020-02-13 21:18:22 -0800626 // Returns true if one of the log files has finished reading everything. When
627 // log file chunks are involved, this means that the last chunk in a log file
628 // has been read. It is acceptable to be missing data at this point in time.
629 bool at_end() const { return at_end_; }
630
631 // Marks that one of the log files is at the end. This should only be called
632 // by timestamp mergers.
633 void NoticeAtEnd() { at_end_ = true; }
634
Austin Schuhcde938c2020-02-02 17:30:07 -0800635 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800636 // Pushes the timestamp for new data on the provided channel.
637 void PushChannelHeap(monotonic_clock::time_point timestamp,
638 int channel_index);
639
Austin Schuh2f8fd752020-09-01 22:38:28 -0700640 // CHECKs that channel_heap_ and timestamp_heap_ are valid heaps.
641 void VerifyHeaps();
642
Austin Schuh6f3babe2020-01-26 20:34:50 -0800643 // All the message readers.
644 std::vector<std::unique_ptr<SplitMessageReader>> split_message_readers_;
645
646 // The log header we are claiming to be.
Austin Schuh97789fc2020-08-01 14:42:45 -0700647 FlatbufferVector<LogFileHeader> log_file_header_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800648
649 // The timestamp mergers which combine data from the split message readers.
650 std::vector<TimestampMerger> timestamp_mergers_;
651
652 // A heap of the channel readers and timestamps for the oldest data in each.
Austin Schuh05b70472020-01-01 17:11:17 -0800653 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800654 // A heap of just the timestamp channel readers and timestamps for the oldest
655 // data in each.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700656 // TODO(austin): I think this is no longer used and can be removed (!)
Austin Schuh8bd96322020-02-13 21:18:22 -0800657 std::vector<std::pair<monotonic_clock::time_point, int>> timestamp_heap_;
Austin Schuh05b70472020-01-01 17:11:17 -0800658
Austin Schuh6f3babe2020-01-26 20:34:50 -0800659 // Configured node.
660 const Node *node_;
661
Austin Schuh8bd96322020-02-13 21:18:22 -0800662 bool at_end_ = false;
663
Austin Schuh6f3babe2020-01-26 20:34:50 -0800664 // Cached copy of the list of nodes.
665 std::vector<const Node *> nodes_;
Austin Schuh2f8fd752020-09-01 22:38:28 -0700666
667 // Last time popped. Used to detect events being returned out of order.
668 monotonic_clock::time_point last_popped_time_ = monotonic_clock::min_time;
Austin Schuh05b70472020-01-01 17:11:17 -0800669};
Austin Schuha36c8902019-12-30 18:07:15 -0800670
Austin Schuhee711052020-08-24 16:06:09 -0700671// Returns the node name with a trailing space, or an empty string if we are on
672// a single node.
673std::string MaybeNodeName(const Node *);
674
Austin Schuha36c8902019-12-30 18:07:15 -0800675} // namespace logger
676} // namespace aos
677
678#endif // AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_