blob: f9a62b100b0811fbe75f7f170e44bf08a447e119 [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#ifndef AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
2#define AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_
3
4#include <sys/uio.h>
5
Austin Schuh97789fc2020-08-01 14:42:45 -07006#include <chrono>
Austin Schuh05b70472020-01-01 17:11:17 -08007#include <deque>
Austin Schuh97789fc2020-08-01 14:42:45 -07008#include <limits>
9#include <memory>
Austin Schuh05b70472020-01-01 17:11:17 -080010#include <optional>
Austin Schuhfa895892020-01-07 20:07:41 -080011#include <string>
Austin Schuha36c8902019-12-30 18:07:15 -080012#include <string_view>
Brian Silverman98360e22020-04-28 16:51:20 -070013#include <tuple>
Austin Schuh97789fc2020-08-01 14:42:45 -070014#include <utility>
Austin Schuha36c8902019-12-30 18:07:15 -080015#include <vector>
16
Austin Schuh4b5c22a2020-11-30 22:58:43 -080017#include "absl/container/btree_set.h"
Austin Schuh05b70472020-01-01 17:11:17 -080018#include "absl/types/span.h"
Philipp Schrader790cb542023-07-05 21:06:52 -070019#include "flatbuffers/flatbuffers.h"
20
Austin Schuh63097262023-08-16 17:04:29 -070021#include "aos/configuration.h"
Brian Silvermanf51499a2020-09-21 12:49:08 -070022#include "aos/containers/resizeable_buffer.h"
Austin Schuha36c8902019-12-30 18:07:15 -080023#include "aos/events/event_loop.h"
Austin Schuh2dc8c7d2021-07-01 17:41:28 -070024#include "aos/events/logging/boot_timestamp.h"
Brian Silvermanf51499a2020-09-21 12:49:08 -070025#include "aos/events/logging/buffer_encoder.h"
Alexei Strots01395492023-03-20 13:59:56 -070026#include "aos/events/logging/log_backend.h"
Austin Schuhc41603c2020-10-11 16:17:37 -070027#include "aos/events/logging/logfile_sorting.h"
Austin Schuha36c8902019-12-30 18:07:15 -080028#include "aos/events/logging/logger_generated.h"
Brian Silvermanf51499a2020-09-21 12:49:08 -070029#include "aos/flatbuffers.h"
Austin Schuhf2d0e682022-10-16 14:20:58 -070030#include "aos/network/remote_message_generated.h"
Austin Schuha36c8902019-12-30 18:07:15 -080031
Brian Silvermanf51499a2020-09-21 12:49:08 -070032namespace aos::logger {
Austin Schuha36c8902019-12-30 18:07:15 -080033
34enum class LogType : uint8_t {
35 // The message originated on this node and should be logged here.
36 kLogMessage,
37 // The message originated on another node, but only the delivery times are
38 // logged here.
39 kLogDeliveryTimeOnly,
Austin Schuh6f3babe2020-01-26 20:34:50 -080040 // The message originated on the other node and should be logged on this node.
41 kLogRemoteMessage
Austin Schuha36c8902019-12-30 18:07:15 -080042};
43
Austin Schuha36c8902019-12-30 18:07:15 -080044// This class manages efficiently writing a sequence of detached buffers to a
Brian Silvermanf51499a2020-09-21 12:49:08 -070045// file. It encodes them, queues them up, and batches the write operation.
Alexei Strots01395492023-03-20 13:59:56 -070046
Austin Schuha36c8902019-12-30 18:07:15 -080047class DetachedBufferWriter {
48 public:
Brian Silvermana9f2ec92020-10-06 18:00:53 -070049 // Marker struct for one of our constructor overloads.
50 struct already_out_of_space_t {};
51
Alexei Strotsbc082d82023-05-03 08:43:42 -070052 DetachedBufferWriter(std::unique_ptr<LogSink> log_sink,
Austin Schuh48d10d62022-10-16 22:19:23 -070053 std::unique_ptr<DataEncoder> encoder);
Brian Silvermana9f2ec92020-10-06 18:00:53 -070054 // Creates a dummy instance which won't even open a file. It will act as if
55 // opening the file ran out of space immediately.
Philipp Schrader10397952023-06-15 11:43:07 -070056 DetachedBufferWriter(already_out_of_space_t);
Austin Schuh2f8fd752020-09-01 22:38:28 -070057 DetachedBufferWriter(DetachedBufferWriter &&other);
58 DetachedBufferWriter(const DetachedBufferWriter &) = delete;
59
Philipp Schrader10397952023-06-15 11:43:07 -070060 virtual ~DetachedBufferWriter();
Austin Schuha36c8902019-12-30 18:07:15 -080061
Austin Schuh2f8fd752020-09-01 22:38:28 -070062 DetachedBufferWriter &operator=(DetachedBufferWriter &&other);
Brian Silverman98360e22020-04-28 16:51:20 -070063 DetachedBufferWriter &operator=(const DetachedBufferWriter &) = delete;
64
Alexei Strotsbc082d82023-05-03 08:43:42 -070065 std::string_view name() const { return log_sink_->name(); }
Austin Schuh6f3babe2020-01-26 20:34:50 -080066
Brian Silvermana9f2ec92020-10-06 18:00:53 -070067 // This will be true until Close() is called, unless the file couldn't be
68 // created due to running out of space.
Alexei Strotsbc082d82023-05-03 08:43:42 -070069 bool is_open() const { return log_sink_->is_open(); }
Brian Silvermana9f2ec92020-10-06 18:00:53 -070070
Brian Silvermanf51499a2020-09-21 12:49:08 -070071 // Queues up a finished FlatBufferBuilder to be encoded and written.
72 //
73 // Triggers a flush if there's enough data queued up.
74 //
75 // Steals the detached buffer from it.
Maxwell Gumleyd26e6292024-04-24 10:45:07 -060076 // Returns the duration of time spent on encoding the message.
77 std::chrono::nanoseconds CopyMessage(DataEncoder::Copier *copier,
78 aos::monotonic_clock::time_point now);
Austin Schuha36c8902019-12-30 18:07:15 -080079
Brian Silverman0465fcf2020-09-24 00:29:18 -070080 // Indicates we got ENOSPC when trying to write. After this returns true, no
81 // further data is written.
82 bool ran_out_of_space() const { return ran_out_of_space_; }
83
84 // To avoid silently failing to write logfiles, you must call this before
85 // destruction if ran_out_of_space() is true and the situation has been
86 // handled.
87 void acknowledge_out_of_space() {
88 CHECK(ran_out_of_space_);
89 acknowledge_ran_out_of_space_ = true;
90 }
91
92 // Fully flushes and closes the underlying file now. No additional data may be
93 // enqueued after calling this.
94 //
95 // This will be performed in the destructor automatically.
96 //
97 // Note that this may set ran_out_of_space().
98 void Close();
99
Brian Silvermanf51499a2020-09-21 12:49:08 -0700100 // Returns the total number of bytes written and currently queued.
Austin Schuha426f1f2021-03-31 22:27:41 -0700101 size_t total_bytes() const {
102 if (!encoder_) {
103 return 0;
104 }
105 return encoder_->total_bytes();
106 }
Austin Schuha36c8902019-12-30 18:07:15 -0800107
Alexei Strotsbc082d82023-05-03 08:43:42 -0700108 WriteStats *WriteStatistics() const { return log_sink_->WriteStatistics(); }
Brian Silverman98360e22020-04-28 16:51:20 -0700109
Austin Schuha36c8902019-12-30 18:07:15 -0800110 private:
Brian Silvermanf51499a2020-09-21 12:49:08 -0700111 // Performs a single writev call with as much of the data we have queued up as
Austin Schuh8bdfc492023-02-11 12:53:13 -0800112 // possible. now is the time we flushed at, to be recorded in
113 // last_flush_time_.
Brian Silvermanf51499a2020-09-21 12:49:08 -0700114 //
115 // This will normally take all of the data we have queued up, unless an
116 // encoder has spit out a big enough chunk all at once that we can't manage
117 // all of it.
Austin Schuh8bdfc492023-02-11 12:53:13 -0800118 void Flush(aos::monotonic_clock::time_point now);
Brian Silvermanf51499a2020-09-21 12:49:08 -0700119
Brian Silvermanf51499a2020-09-21 12:49:08 -0700120 // Flushes data if we've reached the threshold to do that as part of normal
Austin Schuhbd06ae42021-03-31 22:48:21 -0700121 // operation either due to the outstanding queued data, or because we have
122 // passed our flush period. now is the current time to save some CPU grabbing
123 // the current time. It just needs to be close.
124 void FlushAtThreshold(aos::monotonic_clock::time_point now);
Brian Silvermanf51499a2020-09-21 12:49:08 -0700125
Alexei Strotsbc082d82023-05-03 08:43:42 -0700126 std::unique_ptr<LogSink> log_sink_;
Austin Schuh48d10d62022-10-16 22:19:23 -0700127 std::unique_ptr<DataEncoder> encoder_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800128
Brian Silverman0465fcf2020-09-24 00:29:18 -0700129 bool ran_out_of_space_ = false;
130 bool acknowledge_ran_out_of_space_ = false;
Austin Schuha36c8902019-12-30 18:07:15 -0800131
Austin Schuhbd06ae42021-03-31 22:48:21 -0700132 aos::monotonic_clock::time_point last_flush_time_ =
133 aos::monotonic_clock::min_time;
Austin Schuha36c8902019-12-30 18:07:15 -0800134};
135
Austin Schuhf2d0e682022-10-16 14:20:58 -0700136// Repacks the provided RemoteMessage into fbb.
137flatbuffers::Offset<MessageHeader> PackRemoteMessage(
138 flatbuffers::FlatBufferBuilder *fbb,
139 const message_bridge::RemoteMessage *msg, int channel_index,
140 const aos::monotonic_clock::time_point monotonic_timestamp_time);
141
142constexpr flatbuffers::uoffset_t PackRemoteMessageSize() { return 96u; }
143size_t PackRemoteMessageInline(
144 uint8_t *data, const message_bridge::RemoteMessage *msg, int channel_index,
Austin Schuh71a40d42023-02-04 21:22:22 -0800145 const aos::monotonic_clock::time_point monotonic_timestamp_time,
146 size_t start_byte, size_t end_byte);
Austin Schuhf2d0e682022-10-16 14:20:58 -0700147
Austin Schuha36c8902019-12-30 18:07:15 -0800148// Packes a message pointed to by the context into a MessageHeader.
149flatbuffers::Offset<MessageHeader> PackMessage(
150 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
151 int channel_index, LogType log_type);
152
Austin Schuhfa30c352022-10-16 11:12:02 -0700153// Returns the size that the packed message from PackMessage or
154// PackMessageInline will be.
Austin Schuh48d10d62022-10-16 22:19:23 -0700155flatbuffers::uoffset_t PackMessageSize(LogType log_type, size_t data_size);
Austin Schuhfa30c352022-10-16 11:12:02 -0700156
157// Packs the provided message pointed to by context into the provided buffer.
158// This is equivalent to PackMessage, but doesn't require allocating a
159// FlatBufferBuilder underneath.
160size_t PackMessageInline(uint8_t *data, const Context &contex,
Austin Schuh71a40d42023-02-04 21:22:22 -0800161 int channel_index, LogType log_type, size_t start_byte,
162 size_t end_byte);
Austin Schuhfa30c352022-10-16 11:12:02 -0700163
Austin Schuh05b70472020-01-01 17:11:17 -0800164// Class to read chunks out of a log file.
165class SpanReader {
166 public:
Alexei Strotscee7b372023-04-21 11:57:54 -0700167 // It creates a reader and makes proper decoder based on information encoded
168 // in the filename.
Austin Schuhcd368422021-11-22 21:23:29 -0800169 SpanReader(std::string_view filename, bool quiet = false);
Austin Schuha36c8902019-12-30 18:07:15 -0800170
Alexei Strotscee7b372023-04-21 11:57:54 -0700171 // Opens new reader from provided decoder.
172 SpanReader(std::string_view filename, std::unique_ptr<DataDecoder> decoder);
173
Austin Schuh6f3babe2020-01-26 20:34:50 -0800174 std::string_view filename() const { return filename_; }
175
Brian Smarttea913d42021-12-10 15:02:38 -0800176 size_t TotalRead() const { return total_read_; }
177 size_t TotalConsumed() const { return total_consumed_; }
Austin Schuh60e77942022-05-16 17:48:24 -0700178 bool IsIncomplete() const {
179 return is_finished_ && total_consumed_ < total_read_;
180 }
Brian Smarttea913d42021-12-10 15:02:38 -0800181
Austin Schuhcf5f6442021-07-06 10:43:28 -0700182 // Returns a span with the data for the next message from the log file,
183 // including the size. The result is only guarenteed to be valid until
184 // ReadMessage() or PeekMessage() is called again.
Austin Schuh05b70472020-01-01 17:11:17 -0800185 absl::Span<const uint8_t> ReadMessage();
186
Austin Schuhcf5f6442021-07-06 10:43:28 -0700187 // Returns a span with the data for the next message without consuming it.
188 // Multiple calls to PeekMessage return the same data. ReadMessage or
189 // ConsumeMessage must be called to get the next message.
190 absl::Span<const uint8_t> PeekMessage();
191 // Consumes the message so the next call to ReadMessage or PeekMessage returns
192 // new data. This does not invalidate the data.
193 void ConsumeMessage();
194
Austin Schuh05b70472020-01-01 17:11:17 -0800195 private:
196 // TODO(austin): Optimization:
197 // Allocate the 256k blocks like we do today. But, refcount them with
198 // shared_ptr pointed to by the messageheader that is returned. This avoids
199 // the copy. Need to do more benchmarking.
Brian Silvermanf51499a2020-09-21 12:49:08 -0700200 // And (Brian): Consider just mmapping the file and handing out refcounted
201 // pointers into that too.
Austin Schuh05b70472020-01-01 17:11:17 -0800202
203 // Reads a chunk of data into data_. Returns false if no data was read.
204 bool ReadBlock();
205
Austin Schuhc41603c2020-10-11 16:17:37 -0700206 std::string filename_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800207
Brian Silvermanf51499a2020-09-21 12:49:08 -0700208 // File reader and data decoder.
209 std::unique_ptr<DataDecoder> decoder_;
Austin Schuh05b70472020-01-01 17:11:17 -0800210
Brian Silvermanf51499a2020-09-21 12:49:08 -0700211 // Vector to read into.
212 ResizeableBuffer data_;
Austin Schuh05b70472020-01-01 17:11:17 -0800213
214 // Amount of data consumed already in data_.
215 size_t consumed_data_ = 0;
Brian Smarttea913d42021-12-10 15:02:38 -0800216
217 // Accumulates the total volume of bytes read from filename_
218 size_t total_read_ = 0;
219
220 // Accumulates the total volume of read bytes that were 'consumed' into
221 // messages. May be less than total_read_, if the last message (span) is
222 // either truncated or somehow corrupt.
223 size_t total_consumed_ = 0;
224
225 // Reached the end, no more readable messages.
226 bool is_finished_ = false;
Austin Schuh05b70472020-01-01 17:11:17 -0800227};
228
Alexei Strotsa3194712023-04-21 23:30:50 -0700229// Class to borrow log readers from pool based on their ids. This is used as a
230// factory and helps with performance when construction or descrution of
231// decoders are not free. For instance,, S3 fetchers are slow to destroy.
232class ReadersPool {
233 public:
234 virtual ~ReadersPool() = default;
235
236 // Borrow reader from pool based on the id.
237 virtual SpanReader *BorrowReader(std::string_view id) = 0;
238};
239
240class LogReadersPool : public ReadersPool {
241 public:
242 explicit LogReadersPool(const LogSource *log_source = nullptr,
243 size_t pool_size = 50);
244
245 SpanReader *BorrowReader(std::string_view id) override;
246
247 private:
248 const LogSource *log_source_;
249 std::vector<SpanReader> part_readers_;
250 const size_t pool_size_;
251};
252
Brian Silvermanfee16972021-09-14 12:06:38 -0700253// Reads the last header from a log file. This handles any duplicate headers
254// that were written.
255std::optional<SizePrefixedFlatbufferVector<LogFileHeader>> ReadHeader(
256 SpanReader *span_reader);
257std::optional<SizePrefixedFlatbufferVector<LogFileHeader>> ReadHeader(
258 std::string_view filename);
259// Reads the Nth message from a log file, excluding the header. Note: this
260// doesn't handle duplicate headers.
261std::optional<SizePrefixedFlatbufferVector<MessageHeader>> ReadNthMessage(
262 std::string_view filename, size_t n);
263
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700264class UnpackedMessageHeader;
265
Austin Schuh05b70472020-01-01 17:11:17 -0800266// Class which handles reading the header and messages from the log file. This
267// handles any per-file state left before merging below.
268class MessageReader {
269 public:
Alexei Strots58017402023-05-03 22:05:06 -0700270 // TODO (Alexei): it's deprecated and needs to be removed.
271 explicit MessageReader(std::string_view filename)
272 : MessageReader(SpanReader(filename)) {}
273
274 explicit MessageReader(SpanReader span_reader);
Austin Schuh05b70472020-01-01 17:11:17 -0800275
Austin Schuh6f3babe2020-01-26 20:34:50 -0800276 std::string_view filename() const { return span_reader_.filename(); }
277
Austin Schuh05b70472020-01-01 17:11:17 -0800278 // Returns the header from the log file.
279 const LogFileHeader *log_file_header() const {
Austin Schuh97789fc2020-08-01 14:42:45 -0700280 return &raw_log_file_header_.message();
281 }
282
283 // Returns the raw data of the header from the log file.
Austin Schuhadd6eb32020-11-09 21:24:26 -0800284 const SizePrefixedFlatbufferVector<LogFileHeader> &raw_log_file_header()
285 const {
Austin Schuh97789fc2020-08-01 14:42:45 -0700286 return raw_log_file_header_;
Austin Schuh05b70472020-01-01 17:11:17 -0800287 }
288
Mithun Bharadwaja5cb8e02023-08-02 16:10:40 -0700289 // Returns the minimum amount of data needed to queue up for sorting before
290 // we're guarenteed to not see data out of order.
Austin Schuh05b70472020-01-01 17:11:17 -0800291 std::chrono::nanoseconds max_out_of_order_duration() const {
292 return max_out_of_order_duration_;
293 }
294
Austin Schuhcde938c2020-02-02 17:30:07 -0800295 // Returns the newest timestamp read out of the log file.
Austin Schuh05b70472020-01-01 17:11:17 -0800296 monotonic_clock::time_point newest_timestamp() const {
297 return newest_timestamp_;
298 }
299
300 // Returns the next message if there is one.
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700301 std::shared_ptr<UnpackedMessageHeader> ReadMessage();
Austin Schuh05b70472020-01-01 17:11:17 -0800302
303 // The time at which we need to read another chunk from the logfile.
304 monotonic_clock::time_point queue_data_time() const {
305 return newest_timestamp() - max_out_of_order_duration();
306 }
307
Brian Smarttea913d42021-12-10 15:02:38 -0800308 // Flag value setters for testing
309 void set_crash_on_corrupt_message_flag(bool b) {
310 crash_on_corrupt_message_flag_ = b;
311 }
312 void set_ignore_corrupt_messages_flag(bool b) {
313 ignore_corrupt_messages_flag_ = b;
314 }
315
Austin Schuh05b70472020-01-01 17:11:17 -0800316 private:
317 // Log chunk reader.
318 SpanReader span_reader_;
319
Austin Schuh97789fc2020-08-01 14:42:45 -0700320 // Vector holding the raw data for the log file header.
Austin Schuhadd6eb32020-11-09 21:24:26 -0800321 SizePrefixedFlatbufferVector<LogFileHeader> raw_log_file_header_;
Austin Schuh05b70472020-01-01 17:11:17 -0800322
323 // Minimum amount of data to queue up for sorting before we are guarenteed
324 // to not see data out of order.
325 std::chrono::nanoseconds max_out_of_order_duration_;
326
327 // Timestamp of the newest message in a channel queue.
328 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
Brian Smarttea913d42021-12-10 15:02:38 -0800329
330 // Total volume of verifiable messages from the beginning of the file.
331 // TODO - are message counts also useful?
332 size_t total_verified_before_ = 0;
333
334 // Total volume of messages with corrupted flatbuffer formatting, if any.
335 // Excludes corrupted message content.
336 // TODO - if the layout included something as simple as a CRC (relatively
337 // fast and robust enough) for each span, then corrupted content could be
338 // included in this check.
339 size_t total_corrupted_ = 0;
340
341 // Total volume of verifiable messages intermixed with corrupted messages,
342 // if any. Will be == 0 if total_corrupted_ == 0.
343 size_t total_verified_during_ = 0;
344
345 // Total volume of verifiable messages found after the last corrupted one,
346 // if any. Will be == 0 if total_corrupted_ == 0.
347 size_t total_verified_after_ = 0;
348
349 bool is_corrupted() const { return total_corrupted_ > 0; }
350
351 bool crash_on_corrupt_message_flag_ = true;
352 bool ignore_corrupt_messages_flag_ = false;
Austin Schuh05b70472020-01-01 17:11:17 -0800353};
354
Austin Schuhc41603c2020-10-11 16:17:37 -0700355// A class to seamlessly read messages from a list of part files.
356class PartsMessageReader {
357 public:
Alexei Strots58017402023-05-03 22:05:06 -0700358 // TODO (Alexei): it's deprecated, need to removed.
359 explicit PartsMessageReader(LogParts log_parts)
360 : PartsMessageReader(LogPartsAccess(std::nullopt, std::move(log_parts))) {
361 }
362
363 explicit PartsMessageReader(LogPartsAccess log_parts_access);
Austin Schuhc41603c2020-10-11 16:17:37 -0700364
365 std::string_view filename() const { return message_reader_.filename(); }
366
Austin Schuhd2f96102020-12-01 20:27:29 -0800367 // Returns the LogParts that holds the filenames we are reading.
Alexei Strots58017402023-05-03 22:05:06 -0700368 const LogParts &parts() const { return log_parts_access_.parts(); }
Austin Schuhd2f96102020-12-01 20:27:29 -0800369
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800370 const LogFileHeader *log_file_header() const {
371 return message_reader_.log_file_header();
372 }
373
Austin Schuhc41603c2020-10-11 16:17:37 -0700374 // Returns the minimum amount of data needed to queue up for sorting before
375 // we are guarenteed to not see data out of order.
376 std::chrono::nanoseconds max_out_of_order_duration() const {
Mithun Bharadwaja5cb8e02023-08-02 16:10:40 -0700377 return max_out_of_order_duration_;
Austin Schuhc41603c2020-10-11 16:17:37 -0700378 }
379
380 // Returns the newest timestamp read out of the log file.
381 monotonic_clock::time_point newest_timestamp() const {
382 return newest_timestamp_;
383 }
384
385 // Returns the next message if there is one, or nullopt if we have reached the
386 // end of all the files.
387 // Note: reading the next message may change the max_out_of_order_duration().
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700388 std::shared_ptr<UnpackedMessageHeader> ReadMessage();
Austin Schuhc41603c2020-10-11 16:17:37 -0700389
Austin Schuh48507722021-07-17 17:29:24 -0700390 // Returns the boot count for the requested node, or std::nullopt if we don't
391 // know.
392 std::optional<size_t> boot_count(size_t node_index) const {
393 CHECK_GE(node_index, 0u);
394 CHECK_LT(node_index, boot_counts_.size());
395 return boot_counts_[node_index];
396 }
397
Austin Schuhc41603c2020-10-11 16:17:37 -0700398 private:
Alexei Strots58017402023-05-03 22:05:06 -0700399 static SpanReader MakeSpanReader(const LogPartsAccess &log_parts_access,
400 size_t part_number);
401
Austin Schuhc41603c2020-10-11 16:17:37 -0700402 // Opens the next log and updates message_reader_. Sets done_ if there is
403 // nothing more to do.
404 void NextLog();
Austin Schuh48507722021-07-17 17:29:24 -0700405 void ComputeBootCounts();
Austin Schuhc41603c2020-10-11 16:17:37 -0700406
Alexei Strots58017402023-05-03 22:05:06 -0700407 const LogPartsAccess log_parts_access_;
Austin Schuhc41603c2020-10-11 16:17:37 -0700408 size_t next_part_index_ = 1u;
409 bool done_ = false;
Alexei Strots036d84e2023-05-03 16:05:12 -0700410
Austin Schuhc41603c2020-10-11 16:17:37 -0700411 MessageReader message_reader_;
Brian Silvermanfee16972021-09-14 12:06:38 -0700412 // We instantiate the next one early, to allow implementations to prefetch.
413 // TODO(Brian): To get optimal performance when downloading, this needs more
414 // communication with the implementation to prioritize the next part and add
415 // more parallelism when it helps. Maybe some kind of a queue of parts in
416 // order, and the implementation gets to pull however many make sense off the
417 // front?
418 std::optional<MessageReader> next_message_reader_;
Austin Schuhc41603c2020-10-11 16:17:37 -0700419
Austin Schuh315b96b2020-12-11 21:21:12 -0800420 // True after we have seen a message after the start of the log. The
421 // guarentees on logging essentially are that all data from before the
422 // starting time of the log may be arbitrarily out of order, but once we get
423 // max_out_of_order_duration past the start, everything will remain within
424 // max_out_of_order_duration. We shouldn't see anything before the start
425 // after we've seen a message that is at least max_out_of_order_duration after
426 // the start.
427 bool after_start_ = false;
428
Austin Schuhc41603c2020-10-11 16:17:37 -0700429 monotonic_clock::time_point newest_timestamp_ = monotonic_clock::min_time;
Austin Schuh48507722021-07-17 17:29:24 -0700430
431 // Per node boot counts.
432 std::vector<std::optional<size_t>> boot_counts_;
Mithun Bharadwaja5cb8e02023-08-02 16:10:40 -0700433
434 const std::chrono::nanoseconds max_out_of_order_duration_;
Austin Schuhc41603c2020-10-11 16:17:37 -0700435};
436
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700437// Stores MessageHeader as a flat header and inline, aligned block of data.
438class UnpackedMessageHeader {
439 public:
James Kuszmaul9776b392023-01-14 14:08:08 -0800440 UnpackedMessageHeader(
441 uint32_t channel_index, monotonic_clock::time_point monotonic_sent_time,
442 realtime_clock::time_point realtime_sent_time, uint32_t queue_index,
443 std::optional<monotonic_clock::time_point> monotonic_remote_time,
444 std::optional<realtime_clock::time_point> realtime_remote_time,
Austin Schuhb5224ec2024-03-27 15:20:09 -0700445 monotonic_clock::time_point monotonic_remote_transmit_time,
James Kuszmaul9776b392023-01-14 14:08:08 -0800446 std::optional<uint32_t> remote_queue_index,
447 monotonic_clock::time_point monotonic_timestamp_time,
448 bool has_monotonic_timestamp_time, absl::Span<const uint8_t> span)
449 : channel_index(channel_index),
450 monotonic_sent_time(monotonic_sent_time),
451 realtime_sent_time(realtime_sent_time),
452 queue_index(queue_index),
453 monotonic_remote_time(monotonic_remote_time),
454 realtime_remote_time(realtime_remote_time),
Austin Schuhb5224ec2024-03-27 15:20:09 -0700455 monotonic_remote_transmit_time(monotonic_remote_transmit_time),
James Kuszmaul9776b392023-01-14 14:08:08 -0800456 remote_queue_index(remote_queue_index),
457 monotonic_timestamp_time(monotonic_timestamp_time),
458 has_monotonic_timestamp_time(has_monotonic_timestamp_time),
459 span(span) {}
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700460 UnpackedMessageHeader(const UnpackedMessageHeader &) = delete;
461 UnpackedMessageHeader &operator=(const UnpackedMessageHeader &) = delete;
462
463 // The channel.
464 uint32_t channel_index = 0xffffffff;
465
466 monotonic_clock::time_point monotonic_sent_time;
467 realtime_clock::time_point realtime_sent_time;
468
469 // The local queue index.
470 uint32_t queue_index = 0xffffffff;
471
Austin Schuh826e6ce2021-11-18 20:33:10 -0800472 std::optional<aos::monotonic_clock::time_point> monotonic_remote_time;
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700473
474 std::optional<realtime_clock::time_point> realtime_remote_time;
Austin Schuhb5224ec2024-03-27 15:20:09 -0700475 aos::monotonic_clock::time_point monotonic_remote_transmit_time;
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700476 std::optional<uint32_t> remote_queue_index;
477
478 // This field is defaulted in the flatbuffer, so we need to store both the
479 // possibly defaulted value and whether it is defaulted.
480 monotonic_clock::time_point monotonic_timestamp_time;
481 bool has_monotonic_timestamp_time;
482
483 static std::shared_ptr<UnpackedMessageHeader> MakeMessage(
484 const MessageHeader &message);
485
486 // Note: we are storing a span here because we need something to put in the
487 // SharedSpan pointer that RawSender takes. We are using the aliasing
488 // constructor of shared_ptr to avoid the allocation, and it needs a nice
489 // pointer to track.
490 absl::Span<const uint8_t> span;
491
Eric Schmiedebergae00e732023-04-12 15:53:17 -0600492 // Used to be able to mutate the data in the span. This is only used for
493 // mutating the message inside of LogReader for the Before Send Callback. It
494 // is safe in this case since there is only one caller to Send, and the data
495 // is not mutated after Send is called.
496 uint8_t *mutable_data() { return const_cast<uint8_t *>(span.data()); }
497
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700498 char actual_data[];
499
500 private:
501 ~UnpackedMessageHeader() {}
502
503 static void DestroyAndFree(UnpackedMessageHeader *p) {
504 p->~UnpackedMessageHeader();
505 free(p);
506 }
507};
508
509std::ostream &operator<<(std::ostream &os,
510 const UnpackedMessageHeader &message);
511
Austin Schuh1be0ce42020-11-29 22:43:26 -0800512// Struct to hold a message as it gets sorted on a single node.
513struct Message {
514 // The channel.
515 uint32_t channel_index = 0xffffffff;
516 // The local queue index.
Austin Schuh58646e22021-08-23 23:51:46 -0700517 // TODO(austin): Technically the boot inside queue_index is redundant with
518 // timestamp. In practice, it is less error-prone to duplicate it. Maybe a
519 // function to return the combined struct?
520 BootQueueIndex queue_index;
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700521 // The local timestamp.
522 BootTimestamp timestamp;
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700523
Austin Schuh48507722021-07-17 17:29:24 -0700524 // Remote boot when this is a timestamp.
525 size_t monotonic_remote_boot = 0xffffff;
526
527 size_t monotonic_timestamp_boot = 0xffffff;
528
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700529 std::shared_ptr<UnpackedMessageHeader> data;
Austin Schuh1be0ce42020-11-29 22:43:26 -0800530
531 bool operator<(const Message &m2) const;
Austin Schuh63097262023-08-16 17:04:29 -0700532 bool operator<=(const Message &m2) const;
Austin Schuh1be0ce42020-11-29 22:43:26 -0800533 bool operator>=(const Message &m2) const;
Austin Schuh8f52ed52020-11-30 23:12:39 -0800534 bool operator==(const Message &m2) const;
Austin Schuh1be0ce42020-11-29 22:43:26 -0800535};
536
537std::ostream &operator<<(std::ostream &os, const Message &m);
538
Austin Schuhd2f96102020-12-01 20:27:29 -0800539// Structure to hold a full message and all the timestamps, which may or may not
540// have been sent from a remote node. The remote_queue_index will be invalid if
541// this message is from the point of view of the node which sent it.
542struct TimestampedMessage {
543 uint32_t channel_index = 0xffffffff;
544
Austin Schuh58646e22021-08-23 23:51:46 -0700545 BootQueueIndex queue_index;
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700546 BootTimestamp monotonic_event_time;
Austin Schuhd2f96102020-12-01 20:27:29 -0800547 realtime_clock::time_point realtime_event_time = realtime_clock::min_time;
548
Austin Schuh58646e22021-08-23 23:51:46 -0700549 BootQueueIndex remote_queue_index;
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700550 BootTimestamp monotonic_remote_time;
Austin Schuhd2f96102020-12-01 20:27:29 -0800551 realtime_clock::time_point realtime_remote_time = realtime_clock::min_time;
552
Austin Schuhb5224ec2024-03-27 15:20:09 -0700553 BootTimestamp monotonic_remote_transmit_time;
554
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700555 BootTimestamp monotonic_timestamp_time;
Austin Schuh8bf1e632021-01-02 22:41:04 -0800556
Tyler Chatowb7c6eba2021-07-28 14:43:23 -0700557 std::shared_ptr<UnpackedMessageHeader> data;
Austin Schuhd2f96102020-12-01 20:27:29 -0800558};
559
560std::ostream &operator<<(std::ostream &os, const TimestampedMessage &m);
561
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800562// Class to sort the resulting messages from a PartsMessageReader.
Alexei Strotsa8dadd12023-04-28 15:19:23 -0700563class MessageSorter {
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800564 public:
Alexei Strots58017402023-05-03 22:05:06 -0700565 // TODO (Alexei): it's deperecated and need to be removed.
566 explicit MessageSorter(LogParts log_parts)
567 : MessageSorter(LogPartsAccess(std::nullopt, std::move(log_parts))) {}
568
569 explicit MessageSorter(const LogPartsAccess log_parts_access);
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800570
Austin Schuh0ca51f32020-12-25 21:51:45 -0800571 // Returns the parts that this is sorting messages from.
572 const LogParts &parts() const { return parts_message_reader_.parts(); }
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800573
Austin Schuhd2f96102020-12-01 20:27:29 -0800574 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh0ca51f32020-12-25 21:51:45 -0800575 return parts().monotonic_start_time;
Austin Schuhd2f96102020-12-01 20:27:29 -0800576 }
577 realtime_clock::time_point realtime_start_time() const {
Austin Schuh0ca51f32020-12-25 21:51:45 -0800578 return parts().realtime_start_time;
Austin Schuhd2f96102020-12-01 20:27:29 -0800579 }
580
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800581 // The time this data is sorted until.
582 monotonic_clock::time_point sorted_until() const { return sorted_until_; }
583
Adam Snaider13d48d92023-08-03 12:20:15 -0700584 // Returns the next sorted message from the log file.
585 const Message *Front();
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800586 // Pops the front message. This should only be called after a call to
587 // Front().
588 void PopFront();
589
590 // Returns a debug string representing the contents of this sorter.
591 std::string DebugString() const;
592
593 private:
594 // Log parts reader we are wrapping.
595 PartsMessageReader parts_message_reader_;
596 // Cache of the time we are sorted until.
597 aos::monotonic_clock::time_point sorted_until_ = monotonic_clock::min_time;
598
Austin Schuhb000de62020-12-03 22:00:40 -0800599 // Timestamp of the last message returned. Used to make sure nothing goes
600 // backwards.
601 monotonic_clock::time_point last_message_time_ = monotonic_clock::min_time;
602
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800603 // Set used for efficient sorting of messages. We can benchmark and evaluate
604 // other data structures if this proves to be the bottleneck.
605 absl::btree_set<Message> messages_;
Austin Schuh48507722021-07-17 17:29:24 -0700606
607 // Mapping from channel to source node.
608 // TODO(austin): Should we put this in Boots so it can be cached for everyone?
609 std::vector<size_t> source_node_index_;
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800610};
611
Alexei Strotsa8dadd12023-04-28 15:19:23 -0700612// Class to run merge sort on the messages associated with specific node and
613// boot.
614class PartsMerger {
Austin Schuh8f52ed52020-11-30 23:12:39 -0800615 public:
Austin Schuh63097262023-08-16 17:04:29 -0700616 PartsMerger(SelectedLogParts &&selected_parts);
Austin Schuhd2f96102020-12-01 20:27:29 -0800617
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700618 // Copying and moving will mess up the internal raw pointers. Just don't do
619 // it.
Alexei Strotsa8dadd12023-04-28 15:19:23 -0700620 PartsMerger(PartsMerger const &) = delete;
621 PartsMerger(PartsMerger &&) = delete;
622 void operator=(PartsMerger const &) = delete;
623 void operator=(PartsMerger &&) = delete;
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700624
Austin Schuhd2f96102020-12-01 20:27:29 -0800625 // Node index in the configuration of this node.
626 int node() const { return node_; }
Austin Schuh8f52ed52020-11-30 23:12:39 -0800627
Austin Schuh63097262023-08-16 17:04:29 -0700628 std::string_view node_name() const {
629 return configuration::NodeName(configuration().get(), node());
630 }
631
Austin Schuh0ca51f32020-12-25 21:51:45 -0800632 // List of parts being sorted together.
633 std::vector<const LogParts *> Parts() const;
634
Austin Schuh63097262023-08-16 17:04:29 -0700635 const std::shared_ptr<const Configuration> configuration() const {
636 return message_sorters_[0].parts().config;
Austin Schuhd2f96102020-12-01 20:27:29 -0800637 }
638
639 monotonic_clock::time_point monotonic_start_time() const {
640 return monotonic_start_time_;
641 }
642 realtime_clock::time_point realtime_start_time() const {
643 return realtime_start_time_;
Austin Schuh8f52ed52020-11-30 23:12:39 -0800644 }
Austin Schuh63097262023-08-16 17:04:29 -0700645
646 // Returns the oldest message observed in this set of parts. This could be
647 // before the start time if we fetched it at the start of logging from long
648 // ago.
649 monotonic_clock::time_point monotonic_oldest_time() {
650 if (monotonic_oldest_time_ == monotonic_clock::max_time) {
651 VLOG(1) << "No oldest message time, fetching " << node_name();
652 (void)Front();
653 }
Austin Schuh5dd22842021-11-17 16:09:39 -0800654 return monotonic_oldest_time_;
655 }
Austin Schuh8f52ed52020-11-30 23:12:39 -0800656
657 // The time this data is sorted until.
658 monotonic_clock::time_point sorted_until() const { return sorted_until_; }
659
Adam Snaider13d48d92023-08-03 12:20:15 -0700660 // Returns the next sorted message from the set of log files.
661 const Message *Front();
Austin Schuh8f52ed52020-11-30 23:12:39 -0800662 // Pops the front message. This should only be called after a call to
663 // Front().
664 void PopFront();
665
666 private:
667 // Unsorted list of all parts sorters.
Alexei Strotsa8dadd12023-04-28 15:19:23 -0700668 std::vector<MessageSorter> message_sorters_;
Alexei Strots58017402023-05-03 22:05:06 -0700669
Austin Schuh8f52ed52020-11-30 23:12:39 -0800670 // Pointer to the parts sorter holding the current Front message if one
671 // exists, or nullptr if a new one needs to be found.
Alexei Strotsa8dadd12023-04-28 15:19:23 -0700672 MessageSorter *current_ = nullptr;
Austin Schuh8f52ed52020-11-30 23:12:39 -0800673 // Cached sorted_until value.
674 aos::monotonic_clock::time_point sorted_until_ = monotonic_clock::min_time;
Austin Schuhd2f96102020-12-01 20:27:29 -0800675
676 // Cached node.
677 int node_;
678
Austin Schuhb000de62020-12-03 22:00:40 -0800679 // Timestamp of the last message returned. Used to make sure nothing goes
680 // backwards.
681 monotonic_clock::time_point last_message_time_ = monotonic_clock::min_time;
682
Austin Schuhd2f96102020-12-01 20:27:29 -0800683 realtime_clock::time_point realtime_start_time_ = realtime_clock::max_time;
684 monotonic_clock::time_point monotonic_start_time_ = monotonic_clock::max_time;
Austin Schuh60e77942022-05-16 17:48:24 -0700685 monotonic_clock::time_point monotonic_oldest_time_ =
686 monotonic_clock::max_time;
Austin Schuhd2f96102020-12-01 20:27:29 -0800687};
688
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700689// Class to concatenate multiple boots worth of logs into a single per-node
690// stream.
691class BootMerger {
692 public:
Austin Schuh63097262023-08-16 17:04:29 -0700693 BootMerger(std::string_view node_name, const LogFilesContainer &log_files,
694 const std::vector<StoredDataType> &types);
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700695
696 // Copying and moving will mess up the internal raw pointers. Just don't do
697 // it.
698 BootMerger(BootMerger const &) = delete;
699 BootMerger(BootMerger &&) = delete;
700 void operator=(BootMerger const &) = delete;
701 void operator=(BootMerger &&) = delete;
702
703 // Node index in the configuration of this node.
Austin Schuh63097262023-08-16 17:04:29 -0700704 int node() const { return node_; }
705 std::string_view node_name() const;
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700706
707 // List of parts being sorted together.
708 std::vector<const LogParts *> Parts() const;
709
Austin Schuh63097262023-08-16 17:04:29 -0700710 const std::shared_ptr<const Configuration> configuration() const {
711 return configuration_;
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700712 }
713
Austin Schuh63097262023-08-16 17:04:29 -0700714 monotonic_clock::time_point monotonic_start_time(size_t boot) const;
715 realtime_clock::time_point realtime_start_time(size_t boot) const;
716 monotonic_clock::time_point monotonic_oldest_time(size_t boot) const;
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700717
Austin Schuh63097262023-08-16 17:04:29 -0700718 bool started() const;
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700719
Adam Snaider13d48d92023-08-03 12:20:15 -0700720 // Returns the next sorted message from the set of log files.
721 const Message *Front();
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700722 // Pops the front message. This should only be called after a call to
723 // Front().
724 void PopFront();
725
726 private:
727 int index_ = 0;
728
729 // TODO(austin): Sanjay points out this is pretty inefficient. Don't keep so
730 // many things open.
Austin Schuh63097262023-08-16 17:04:29 -0700731 // A list of all the parts mergers. Only the boots with something to sort are
732 // instantiated.
Alexei Strotsa8dadd12023-04-28 15:19:23 -0700733 std::vector<std::unique_ptr<PartsMerger>> parts_mergers_;
Austin Schuh63097262023-08-16 17:04:29 -0700734
735 std::shared_ptr<const Configuration> configuration_;
736 int node_;
737};
738
739enum class TimestampQueueStrategy {
740 // Read the timestamps at the same time as all the other data.
741 kQueueTogether,
742 // Read the timestamps first.
743 kQueueTimestampsAtStartup,
744};
745
746// Class to manage queueing up timestamps from BootMerger and notifying
747// TimestampMapper of them.
748class SplitTimestampBootMerger {
749 public:
750 SplitTimestampBootMerger(std::string_view node_name,
751 const LogFilesContainer &log_files,
752 TimestampQueueStrategy timestamp_queue_strategy);
753
754 // Copying and moving will mess up the internal raw pointers. Just don't do
755 // it.
756 SplitTimestampBootMerger(SplitTimestampBootMerger const &) = delete;
757 SplitTimestampBootMerger(SplitTimestampBootMerger &&) = delete;
758 void operator=(SplitTimestampBootMerger const &) = delete;
759 void operator=(SplitTimestampBootMerger &&) = delete;
760
761 // Reads all timestamps into a member variable queue, and calls the function
762 // on each timestamp. This only saves timestamps, which are defined as
763 // messages sent on this node, but not originally from this node. To make
764 // that distinction, source_node is provided which has a list of which node
765 // index is the source node for each channel, where the channel index is the
766 // array index.
767 void QueueTimestamps(std::function<void(TimestampedMessage *)> fn,
768 const std::vector<size_t> &source_node);
769
770 // Node index in the configuration of this node.
771 int node() const { return boot_merger_.node(); }
772 // Returns the name of the node this class is sorting for.
773 std::string_view node_name() const;
774
775 std::shared_ptr<const Configuration> configuration() const {
776 return boot_merger_.configuration();
777 }
778
779 monotonic_clock::time_point monotonic_start_time(size_t boot) const;
780 realtime_clock::time_point realtime_start_time(size_t boot) const;
781 monotonic_clock::time_point monotonic_oldest_time(size_t boot) const;
782
783 // Returns true if the log has been started.
784 bool started() const {
785 // Timestamps don't count, so only track boot_merger_.
786 return boot_merger_.started();
787 }
788
Adam Snaider13d48d92023-08-03 12:20:15 -0700789 // Returns the next sorted message from the set of log files.
790 const Message *Front();
Austin Schuh63097262023-08-16 17:04:29 -0700791
792 // Pops the front message. This should only be called after a call to
793 // Front().
794 void PopFront();
795
796 private:
797 enum class MessageSource {
798 kTimestampMessage,
799 kBootMerger,
800 };
801
802 MessageSource message_source_ = MessageSource::kBootMerger;
803
804 // Boot merger for data and potentially timestamps.
805 BootMerger boot_merger_;
806
807 // Boot merger for just timestamps. Any data read from here is to be ignored.
808 std::unique_ptr<BootMerger> timestamp_boot_merger_;
809
810 // The callback requires us to convert each message to a TimestampedMessage.
811 std::deque<TimestampedMessage> timestamp_messages_;
812
813 // Storage for the next timestamp message to return. This is separate so we
814 // can convert them back to a Message.
815 //
816 // TODO(austin): It would be nice to not have to convert...
817 std::optional<Message> next_timestamp_;
818
819 // Start times for each boot.
820 std::vector<monotonic_clock::time_point> monotonic_start_time_;
821 std::vector<realtime_clock::time_point> realtime_start_time_;
822
823 // Tracks if QueueTimestamps loaded any timestamps.
824 bool queue_timestamps_ran_ = false;
Austin Schuhf16ef6a2021-06-30 21:48:17 -0700825};
826
Austin Schuhd2f96102020-12-01 20:27:29 -0800827// Class to match timestamps with the corresponding data from other nodes.
Austin Schuh79b30942021-01-24 22:32:21 -0800828//
829// This class also buffers data for the node it represents, and supports
830// notifying when new data is queued as well as queueing until a point in time.
Austin Schuhd2f96102020-12-01 20:27:29 -0800831class TimestampMapper {
832 public:
Alexei Strots1f51ac72023-05-15 10:14:54 -0700833 TimestampMapper(std::string_view node_name,
Austin Schuh63097262023-08-16 17:04:29 -0700834 const LogFilesContainer &log_files,
835 TimestampQueueStrategy timestamp_queue_strategy);
Austin Schuhd2f96102020-12-01 20:27:29 -0800836
837 // Copying and moving will mess up the internal raw pointers. Just don't do
838 // it.
839 TimestampMapper(TimestampMapper const &) = delete;
840 TimestampMapper(TimestampMapper &&) = delete;
841 void operator=(TimestampMapper const &) = delete;
842 void operator=(TimestampMapper &&) = delete;
843
844 // TODO(austin): It would be super helpful to provide a way to queue up to
845 // time X without matching timestamps, and to then be able to pull the
846 // timestamps out of this queue. This lets us bootstrap time estimation
847 // without exploding memory usage worst case.
848
Austin Schuh0ca51f32020-12-25 21:51:45 -0800849 const Configuration *configuration() const { return configuration_.get(); }
Austin Schuhd2f96102020-12-01 20:27:29 -0800850
851 // Returns which node this is sorting for.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700852 size_t node() const { return boot_merger_.node(); }
Austin Schuhd2f96102020-12-01 20:27:29 -0800853
854 // The start time of this log.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700855 monotonic_clock::time_point monotonic_start_time(size_t boot) const {
856 return boot_merger_.monotonic_start_time(boot);
Austin Schuhd2f96102020-12-01 20:27:29 -0800857 }
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700858 realtime_clock::time_point realtime_start_time(size_t boot) const {
859 return boot_merger_.realtime_start_time(boot);
Austin Schuhd2f96102020-12-01 20:27:29 -0800860 }
Austin Schuh5dd22842021-11-17 16:09:39 -0800861 // Returns the oldest timestamp on a message on this boot.
862 monotonic_clock::time_point monotonic_oldest_time(size_t boot) const {
863 return boot_merger_.monotonic_oldest_time(boot);
864 }
Austin Schuhd2f96102020-12-01 20:27:29 -0800865
866 // Uses timestamp_mapper as the peer for its node. Only one mapper may be set
867 // for each node. Peers are used to look up the data for timestamps on this
868 // node.
869 void AddPeer(TimestampMapper *timestamp_mapper);
870
Austin Schuh24bf4972021-06-29 22:09:08 -0700871 // Returns true if anything has been queued up.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700872 bool started() const { return boot_merger_.started(); }
Austin Schuhd2f96102020-12-01 20:27:29 -0800873
874 // Returns the next message for this node.
875 TimestampedMessage *Front();
876 // Pops the next message. Front must be called first.
877 void PopFront();
878
879 // Returns debug information about this node.
880 std::string DebugString() const;
881
Austin Schuh63097262023-08-16 17:04:29 -0700882 // Queues just the timestamps so that the timestamp callback gets called.
883 // Note, the timestamp callback will get called when they get returned too, so
884 // make sure to unset it if you don't want to be called twice.
885 void QueueTimestamps();
886
Austin Schuh79b30942021-01-24 22:32:21 -0800887 // Queues data the provided time.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700888 void QueueUntil(BootTimestamp queue_time);
Austin Schuhe639ea12021-01-25 13:00:22 -0800889 // Queues until we have time_estimation_buffer of data in the queue.
890 void QueueFor(std::chrono::nanoseconds time_estimation_buffer);
Austin Schuh79b30942021-01-24 22:32:21 -0800891
Austin Schuh06601222021-01-26 17:02:50 -0800892 // Queues until the condition is met.
893 template <typename T>
894 void QueueUntilCondition(T fn) {
895 while (true) {
896 if (fn()) {
897 break;
898 }
899 if (!QueueMatched()) {
900 break;
901 }
902 }
903 }
904
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700905 // Sets the callback that can be used to skip messages.
906 void set_replay_channels_callback(
907 std::function<bool(const TimestampedMessage &)> fn) {
908 replay_channels_callback_ = fn;
909 }
910
Austin Schuh79b30942021-01-24 22:32:21 -0800911 // Sets a callback to be called whenever a full message is queued.
912 void set_timestamp_callback(std::function<void(TimestampedMessage *)> fn) {
913 timestamp_callback_ = fn;
914 }
915
Austin Schuhd2f96102020-12-01 20:27:29 -0800916 private:
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700917 // Result of MaybeQueueMatched
918 enum class MatchResult : uint8_t {
919 kEndOfFile, // End of the log file being read
920 kQueued, // Message was queued
921 kSkipped // Message was skipped over
922 };
923
Austin Schuhd2f96102020-12-01 20:27:29 -0800924 // The state for a remote node. This holds the data that needs to be matched
925 // with the remote node's timestamps.
926 struct NodeData {
927 // True if we should save data here. This should be true if any of the
928 // bools in delivered below are true.
929 bool any_delivered = false;
930
Austin Schuh36c00932021-07-19 18:13:21 -0700931 // True if we have a peer and therefore should be saving data for it.
932 bool save_for_peer = false;
933
Austin Schuhd2f96102020-12-01 20:27:29 -0800934 // Peer pointer. This node is only to be considered if a peer is set.
935 TimestampMapper *peer = nullptr;
936
937 struct ChannelData {
938 // Deque per channel. This contains the data from the outside
939 // TimestampMapper node which is relevant for the node this NodeData
940 // points to.
941 std::deque<Message> messages;
942 // Bool tracking per channel if a message is delivered to the node this
943 // NodeData represents.
944 bool delivered = false;
Austin Schuh6a7358f2021-11-18 22:40:40 -0800945 // The TTL for delivery.
946 std::chrono::nanoseconds time_to_live = std::chrono::nanoseconds(0);
Austin Schuhd2f96102020-12-01 20:27:29 -0800947 };
948
949 // Vector with per channel data.
950 std::vector<ChannelData> channels;
951 };
952
953 // Returns (and forgets about) the data for the provided timestamp message
954 // showing when it was delivered to this node.
955 Message MatchingMessageFor(const Message &message);
956
957 // Queues up a single message into our message queue, and any nodes that this
958 // message is delivered to. Returns true if one was available, false
959 // otherwise.
960 bool Queue();
961
Austin Schuh79b30942021-01-24 22:32:21 -0800962 // Queues up a single matched message into our matched message queue. Returns
963 // true if one was queued, and false otherwise.
964 bool QueueMatched();
965
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700966 // Queues a message if the replay_channels_callback is passed and the end of
967 // the log file has not been reached.
968 MatchResult MaybeQueueMatched();
969
Austin Schuhd2f96102020-12-01 20:27:29 -0800970 // Queues up data until we have at least one message >= to time t.
971 // Useful for triggering a remote node to read enough data to have the
972 // timestamp you care about available.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700973 void QueueUnmatchedUntil(BootTimestamp t);
Austin Schuhd2f96102020-12-01 20:27:29 -0800974
Austin Schuh79b30942021-01-24 22:32:21 -0800975 // Queues m into matched_messages_.
Adam Snaider13d48d92023-08-03 12:20:15 -0700976 void QueueMessage(const Message *m);
Austin Schuhd2f96102020-12-01 20:27:29 -0800977
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700978 // If a replay_channels_callback was set and the callback returns false, a
979 // matched message is popped and true is returned. Otherwise false is
980 // returned.
981 bool CheckReplayChannelsAndMaybePop(const TimestampedMessage &message);
982
Austin Schuh58646e22021-08-23 23:51:46 -0700983 // Returns the name of the node this class is sorting for.
984 std::string_view node_name() const {
Austin Schuh63097262023-08-16 17:04:29 -0700985 return configuration::NodeName(configuration(), node());
Austin Schuh58646e22021-08-23 23:51:46 -0700986 }
987
Austin Schuhd2f96102020-12-01 20:27:29 -0800988 // The node merger to source messages from.
Austin Schuh63097262023-08-16 17:04:29 -0700989 SplitTimestampBootMerger boot_merger_;
Austin Schuh0ca51f32020-12-25 21:51:45 -0800990
991 std::shared_ptr<const Configuration> configuration_;
992
Austin Schuhd2f96102020-12-01 20:27:29 -0800993 // The buffer of messages for this node. These are not matched with any
994 // remote data.
995 std::deque<Message> messages_;
996 // The node index for the source node for each channel.
997 std::vector<size_t> source_node_;
998
999 // Vector per node. Not all nodes will have anything.
1000 std::vector<NodeData> nodes_data_;
1001
1002 // Latest message to return.
Austin Schuh79b30942021-01-24 22:32:21 -08001003 std::deque<TimestampedMessage> matched_messages_;
Austin Schuhd2f96102020-12-01 20:27:29 -08001004
Austin Schuh79b30942021-01-24 22:32:21 -08001005 // Tracks the state of the first message in matched_messages_. Do we need to
1006 // update it, is it valid, or should we return nullptr?
Austin Schuhd2f96102020-12-01 20:27:29 -08001007 enum class FirstMessage {
1008 kNeedsUpdate,
1009 kInMessage,
1010 kNullptr,
1011 };
1012 FirstMessage first_message_ = FirstMessage::kNeedsUpdate;
1013
1014 // Timestamp of the last message returned. Used to make sure nothing goes
1015 // backwards.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -07001016 BootTimestamp last_message_time_ = BootTimestamp::min_time();
Austin Schuh6a7358f2021-11-18 22:40:40 -08001017 BootTimestamp last_popped_message_time_ = BootTimestamp::min_time();
Austin Schuhd2f96102020-12-01 20:27:29 -08001018 // Time this node is queued up until. Used for caching.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -07001019 BootTimestamp queued_until_ = BootTimestamp::min_time();
Austin Schuh79b30942021-01-24 22:32:21 -08001020
1021 std::function<void(TimestampedMessage *)> timestamp_callback_;
Eric Schmiedebergb38477e2022-12-02 16:08:04 -07001022 std::function<bool(TimestampedMessage &)> replay_channels_callback_;
Austin Schuh8f52ed52020-11-30 23:12:39 -08001023};
1024
Alexei Strots036d84e2023-05-03 16:05:12 -07001025// Returns the node name, or an empty string if we are a single node.
1026inline std::string_view MaybeNodeName(const Node *node) {
1027 if (node != nullptr) {
1028 return node->name()->string_view();
1029 }
1030 return "";
1031}
Austin Schuhee711052020-08-24 16:06:09 -07001032
Austin Schuh71a40d42023-02-04 21:22:22 -08001033// Class to copy a RemoteMessage into the provided buffer.
1034class RemoteMessageCopier : public DataEncoder::Copier {
1035 public:
1036 RemoteMessageCopier(const message_bridge::RemoteMessage *message,
1037 int channel_index,
1038 aos::monotonic_clock::time_point monotonic_timestamp_time,
1039 EventLoop *event_loop)
1040 : DataEncoder::Copier(PackRemoteMessageSize()),
1041 message_(message),
1042 channel_index_(channel_index),
1043 monotonic_timestamp_time_(monotonic_timestamp_time),
1044 event_loop_(event_loop) {}
1045
1046 monotonic_clock::time_point end_time() const { return end_time_; }
1047
1048 size_t Copy(uint8_t *data, size_t start_byte, size_t end_byte) final {
1049 size_t result = PackRemoteMessageInline(data, message_, channel_index_,
1050 monotonic_timestamp_time_,
1051 start_byte, end_byte);
1052 end_time_ = event_loop_->monotonic_now();
1053 return result;
1054 }
1055
1056 private:
1057 const message_bridge::RemoteMessage *message_;
1058 int channel_index_;
1059 aos::monotonic_clock::time_point monotonic_timestamp_time_;
1060 EventLoop *event_loop_;
1061 monotonic_clock::time_point end_time_;
1062};
1063
1064// Class to copy a context into the provided buffer.
1065class ContextDataCopier : public DataEncoder::Copier {
1066 public:
1067 ContextDataCopier(const Context &context, int channel_index, LogType log_type,
1068 EventLoop *event_loop)
1069 : DataEncoder::Copier(PackMessageSize(log_type, context.size)),
1070 context_(context),
1071 channel_index_(channel_index),
1072 log_type_(log_type),
1073 event_loop_(event_loop) {}
1074
1075 monotonic_clock::time_point end_time() const { return end_time_; }
1076
1077 size_t Copy(uint8_t *data, size_t start_byte, size_t end_byte) final {
1078 size_t result = PackMessageInline(data, context_, channel_index_, log_type_,
1079 start_byte, end_byte);
1080 end_time_ = event_loop_->monotonic_now();
1081 return result;
1082 }
1083
1084 private:
1085 const Context &context_;
1086 const int channel_index_;
1087 const LogType log_type_;
1088 EventLoop *event_loop_;
1089 monotonic_clock::time_point end_time_;
1090};
1091
Brian Silvermanf51499a2020-09-21 12:49:08 -07001092} // namespace aos::logger
Austin Schuha36c8902019-12-30 18:07:15 -08001093
1094#endif // AOS_EVENTS_LOGGING_LOGFILE_UTILS_H_