blob: a117b5af7420fb1ee589758351d4392925f91daf [file] [log] [blame]
Austin Schuhe309d2a2019-11-29 13:25:21 -08001#ifndef AOS_EVENTS_LOGGER_H_
2#define AOS_EVENTS_LOGGER_H_
3
Austin Schuh8bd96322020-02-13 21:18:22 -08004#include <chrono>
Austin Schuhe309d2a2019-11-29 13:25:21 -08005#include <deque>
Austin Schuh05b70472020-01-01 17:11:17 -08006#include <string_view>
Austin Schuh2f8fd752020-09-01 22:38:28 -07007#include <tuple>
Austin Schuh6f3babe2020-01-26 20:34:50 -08008#include <vector>
Austin Schuhe309d2a2019-11-29 13:25:21 -08009
Austin Schuh8bd96322020-02-13 21:18:22 -080010#include "Eigen/Dense"
11#include "absl/strings/str_cat.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080012#include "absl/types/span.h"
13#include "aos/events/event_loop.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070014#include "aos/events/logging/eigen_mpq.h"
Austin Schuhcb5601b2020-09-10 15:29:59 -070015#include "aos/events/logging/log_namer.h"
Austin Schuhf6f9bf32020-10-11 14:37:43 -070016#include "aos/events/logging/logfile_sorting.h"
Austin Schuha36c8902019-12-30 18:07:15 -080017#include "aos/events/logging/logfile_utils.h"
James Kuszmaul38735e82019-12-07 16:42:06 -080018#include "aos/events/logging/logger_generated.h"
Austin Schuh64fab802020-09-09 22:47:47 -070019#include "aos/events/logging/uuid.h"
Austin Schuh92547522019-12-28 14:33:43 -080020#include "aos/events/simulated_event_loop.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070021#include "aos/network/message_bridge_server_generated.h"
Austin Schuh0ca1fd32020-12-18 22:53:05 -080022#include "aos/network/multinode_timestamp_filter.h"
Austin Schuh0de30f32020-12-06 12:44:28 -080023#include "aos/network/remote_message_generated.h"
Austin Schuh8bd96322020-02-13 21:18:22 -080024#include "aos/network/timestamp_filter.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080025#include "aos/time/time.h"
26#include "flatbuffers/flatbuffers.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070027#include "third_party/gmp/gmpxx.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080028
29namespace aos {
30namespace logger {
31
Austin Schuhe309d2a2019-11-29 13:25:21 -080032// Logs all channels available in the event loop to disk every 100 ms.
33// Start by logging one message per channel to capture any state and
34// configuration that is sent rately on a channel and would affect execution.
35class Logger {
36 public:
Austin Schuh0c297012020-09-16 18:41:59 -070037 // Constructs a logger.
Austin Schuh0c297012020-09-16 18:41:59 -070038 // event_loop: The event loop used to read the messages.
Austin Schuh0c297012020-09-16 18:41:59 -070039 // configuration: When provided, this is the configuration to log, and the
40 // configuration to use for the channel list to log. If not provided,
41 // this becomes the configuration from the event loop.
Brian Silverman1f345222020-09-24 21:14:48 -070042 // should_log: When provided, a filter for channels to log. If not provided,
43 // all available channels are logged.
44 Logger(EventLoop *event_loop)
45 : Logger(event_loop, event_loop->configuration()) {}
46 Logger(EventLoop *event_loop, const Configuration *configuration)
47 : Logger(event_loop, configuration,
48 [](const Channel *) { return true; }) {}
49 Logger(EventLoop *event_loop, const Configuration *configuration,
50 std::function<bool(const Channel *)> should_log);
Austin Schuh0c297012020-09-16 18:41:59 -070051 ~Logger();
52
53 // Overrides the name in the log file header.
54 void set_name(std::string_view name) { name_ = name; }
Austin Schuhe309d2a2019-11-29 13:25:21 -080055
Brian Silverman1f345222020-09-24 21:14:48 -070056 // Sets the callback to run after each period of data is logged. Defaults to
57 // doing nothing.
58 //
59 // This callback may safely do things like call Rotate().
60 void set_on_logged_period(std::function<void()> on_logged_period) {
61 on_logged_period_ = std::move(on_logged_period);
62 }
63
Austin Schuh8c399962020-12-25 21:51:45 -080064 void set_separate_config(bool separate_config) {
65 separate_config_ = separate_config;
66 }
67
Brian Silverman1f345222020-09-24 21:14:48 -070068 // Sets the period between polling the data. Defaults to 100ms.
69 //
70 // Changing this while a set of files is being written may result in
71 // unreadable files.
72 void set_polling_period(std::chrono::nanoseconds polling_period) {
73 polling_period_ = polling_period;
74 }
75
Brian Silvermanae7c0332020-09-30 16:58:23 -070076 std::string_view log_start_uuid() const { return log_start_uuid_; }
Brian Silverman035e4182020-10-06 17:13:00 -070077 UUID logger_instance_uuid() const { return logger_instance_uuid_; }
Brian Silvermanae7c0332020-09-30 16:58:23 -070078
Brian Silvermancb805822020-10-06 17:43:35 -070079 // The maximum time for a single fetch which returned a message, or 0 if none
80 // of those have happened.
81 std::chrono::nanoseconds max_message_fetch_time() const {
82 return max_message_fetch_time_;
83 }
84 // The channel for that longest fetch which returned a message, or -1 if none
85 // of those have happened.
86 int max_message_fetch_time_channel() const {
87 return max_message_fetch_time_channel_;
88 }
89 // The size of the message returned by that longest fetch, or -1 if none of
90 // those have happened.
91 int max_message_fetch_time_size() const {
92 return max_message_fetch_time_size_;
93 }
94 // The total time spent fetching messages.
95 std::chrono::nanoseconds total_message_fetch_time() const {
96 return total_message_fetch_time_;
97 }
98 // The total number of fetch calls which returned messages.
99 int total_message_fetch_count() const { return total_message_fetch_count_; }
100 // The total number of bytes fetched.
101 int64_t total_message_fetch_bytes() const {
102 return total_message_fetch_bytes_;
103 }
104
105 // The total time spent in fetches which did not return a message.
106 std::chrono::nanoseconds total_nop_fetch_time() const {
107 return total_nop_fetch_time_;
108 }
109 // The total number of fetches which did not return a message.
110 int total_nop_fetch_count() const { return total_nop_fetch_count_; }
111
112 // The maximum time for a single copy, or 0 if none of those have happened.
113 std::chrono::nanoseconds max_copy_time() const { return max_copy_time_; }
114 // The channel for that longest copy, or -1 if none of those have happened.
115 int max_copy_time_channel() const { return max_copy_time_channel_; }
116 // The size of the message for that longest copy, or -1 if none of those have
117 // happened.
118 int max_copy_time_size() const { return max_copy_time_size_; }
119 // The total time spent copying messages.
120 std::chrono::nanoseconds total_copy_time() const { return total_copy_time_; }
121 // The total number of messages copied.
122 int total_copy_count() const { return total_copy_count_; }
123 // The total number of bytes copied.
124 int64_t total_copy_bytes() const { return total_copy_bytes_; }
125
126 void ResetStatisics();
127
Austin Schuh2f8fd752020-09-01 22:38:28 -0700128 // Rotates the log file(s), triggering new part files to be written for each
129 // log file.
130 void Rotate();
Austin Schuhfa895892020-01-07 20:07:41 -0800131
Brian Silverman1f345222020-09-24 21:14:48 -0700132 // Starts logging to files with the given naming scheme.
Brian Silvermanae7c0332020-09-30 16:58:23 -0700133 //
134 // log_start_uuid may be used to tie this log event to other log events across
135 // multiple nodes. The default (empty string) indicates there isn't one
136 // available.
137 void StartLogging(std::unique_ptr<LogNamer> log_namer,
138 std::string_view log_start_uuid = "");
Brian Silverman1f345222020-09-24 21:14:48 -0700139
140 // Stops logging. Ensures any messages through end_time make it into the log.
141 //
142 // If you want to stop ASAP, pass min_time to avoid reading any more messages.
143 //
144 // Returns the LogNamer in case the caller wants to do anything else with it
145 // before destroying it.
146 std::unique_ptr<LogNamer> StopLogging(
147 aos::monotonic_clock::time_point end_time);
148
149 // Returns whether a log is currently being written.
150 bool is_started() const { return static_cast<bool>(log_namer_); }
151
152 // Shortcut to call StartLogging with a LocalLogNamer when event processing
153 // starts.
154 void StartLoggingLocalNamerOnRun(std::string base_name) {
155 event_loop_->OnRun([this, base_name]() {
156 StartLogging(
157 std::make_unique<LocalLogNamer>(base_name, event_loop_->node()));
158 });
159 }
160
Austin Schuhe309d2a2019-11-29 13:25:21 -0800161 private:
Austin Schuhe309d2a2019-11-29 13:25:21 -0800162 // Structure to track both a fetcher, and if the data fetched has been
163 // written. We may want to delay writing data to disk so that we don't let
164 // data get too far out of order when written to disk so we can avoid making
165 // it too hard to sort when reading.
166 struct FetcherStruct {
167 std::unique_ptr<RawFetcher> fetcher;
168 bool written = false;
Austin Schuh15649d62019-12-28 16:36:38 -0800169
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700170 // Channel index to log to.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800171 int channel_index = -1;
Brian Silverman1f345222020-09-24 21:14:48 -0700172 const Channel *channel = nullptr;
173 const Node *timestamp_node = nullptr;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800174
175 LogType log_type = LogType::kLogMessage;
176
Brian Silverman1f345222020-09-24 21:14:48 -0700177 // We fill out the metadata at construction, but the actual writers have to
178 // be updated each time we start logging. To avoid duplicating the complex
179 // logic determining whether each writer should be initialized, we just
180 // stash the answer in separate member variables.
181 bool wants_writer = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800182 DetachedBufferWriter *writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700183 bool wants_timestamp_writer = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800184 DetachedBufferWriter *timestamp_writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700185 bool wants_contents_writer = false;
Austin Schuh2f8fd752020-09-01 22:38:28 -0700186 DetachedBufferWriter *contents_writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700187
Austin Schuh315b96b2020-12-11 21:21:12 -0800188 // Node which this data is from, or -1 if it is unknown.
189 int data_node_index = -1;
190 // Node that this timestamp is for, or -1 if it is known.
191 int timestamp_node_index = -1;
192 // Node that the contents this contents_writer will log are from.
193 int contents_node_index = -1;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800194 };
195
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700196 // Vector mapping from the channel index from the event loop to the logged
197 // channel index.
198 std::vector<int> event_loop_to_logged_channel_index_;
199
Austin Schuh2f8fd752020-09-01 22:38:28 -0700200 struct NodeState {
201 aos::monotonic_clock::time_point monotonic_start_time =
202 aos::monotonic_clock::min_time;
203 aos::realtime_clock::time_point realtime_start_time =
204 aos::realtime_clock::min_time;
205
Austin Schuh315b96b2020-12-11 21:21:12 -0800206 bool has_source_node_boot_uuid = false;
207
208 // This is an initial UUID that is a valid UUID4 and is pretty obvious that
209 // it isn't valid.
210 std::string source_node_boot_uuid = "00000000-0000-4000-8000-000000000000";
211
Austin Schuh2f8fd752020-09-01 22:38:28 -0700212 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> log_file_header =
213 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader>::Empty();
Austin Schuh315b96b2020-12-11 21:21:12 -0800214
215 // True if a header has been written to the start of a log file.
216 bool header_written = false;
217 // True if the current written header represents the contents which will
218 // follow. This is cleared when boot_uuid is known to not match anymore.
219 bool header_valid = false;
220
221 // Sets the source_node_boot_uuid, properly updating everything.
222 void SetBootUUID(std::string_view new_source_node_boot_uuid) {
223 source_node_boot_uuid = new_source_node_boot_uuid;
224 header_valid = false;
225 has_source_node_boot_uuid = true;
226
227 flatbuffers::String *source_node_boot_uuid_string =
228 log_file_header.mutable_message()->mutable_source_node_boot_uuid();
229 CHECK_EQ(source_node_boot_uuid.size(),
230 source_node_boot_uuid_string->size());
231 memcpy(source_node_boot_uuid_string->data(), source_node_boot_uuid.data(),
232 source_node_boot_uuid.size());
233 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700234 };
Brian Silverman1f345222020-09-24 21:14:48 -0700235
236 void WriteHeader();
Austin Schuh315b96b2020-12-11 21:21:12 -0800237
Brian Silverman1f345222020-09-24 21:14:48 -0700238 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> MakeHeader(
Austin Schuh8c399962020-12-25 21:51:45 -0800239 const Node *node, std::string_view config_sha256);
Brian Silverman1f345222020-09-24 21:14:48 -0700240
Austin Schuh315b96b2020-12-11 21:21:12 -0800241 // Writes the header for the provided node if enough information is valid.
242 void MaybeWriteHeader(int node_index);
243 // Overload for when we already know node as well.
244 void MaybeWriteHeader(int node_index, const Node *node);
245
Brian Silverman1f345222020-09-24 21:14:48 -0700246 bool MaybeUpdateTimestamp(
247 const Node *node, int node_index,
248 aos::monotonic_clock::time_point monotonic_start_time,
249 aos::realtime_clock::time_point realtime_start_time);
250
251 void DoLogData(const monotonic_clock::time_point end_time);
252
253 void WriteMissingTimestamps();
254
255 // Fetches from each channel until all the data is logged.
256 void LogUntil(monotonic_clock::time_point t);
257
Brian Silvermancb805822020-10-06 17:43:35 -0700258 void RecordFetchResult(aos::monotonic_clock::time_point start,
259 aos::monotonic_clock::time_point end, bool got_new,
260 FetcherStruct *fetcher);
261
262 void RecordCreateMessageTime(aos::monotonic_clock::time_point start,
263 aos::monotonic_clock::time_point end,
264 FetcherStruct *fetcher);
265
Brian Silverman1f345222020-09-24 21:14:48 -0700266 // Sets the start time for a specific node.
Austin Schuh315b96b2020-12-11 21:21:12 -0800267 void SetStartTime(
268 size_t node_index, aos::monotonic_clock::time_point monotonic_start_time,
269 aos::realtime_clock::time_point realtime_start_time,
270 aos::monotonic_clock::time_point logger_monotonic_start_time,
271 aos::realtime_clock::time_point logger_realtime_start_time);
Brian Silverman1f345222020-09-24 21:14:48 -0700272
Brian Silvermanae7c0332020-09-30 16:58:23 -0700273 EventLoop *const event_loop_;
Brian Silverman1f345222020-09-24 21:14:48 -0700274 // The configuration to place at the top of the log file.
275 const Configuration *const configuration_;
276
Brian Silvermanae7c0332020-09-30 16:58:23 -0700277 UUID log_event_uuid_ = UUID::Zero();
278 const UUID logger_instance_uuid_ = UUID::Random();
279 std::unique_ptr<LogNamer> log_namer_;
280 // Empty indicates there isn't one.
281 std::string log_start_uuid_;
Brian Silvermanae7c0332020-09-30 16:58:23 -0700282
Brian Silverman1f345222020-09-24 21:14:48 -0700283 // Name to save in the log file. Defaults to hostname.
284 std::string name_;
285
286 std::function<void()> on_logged_period_ = []() {};
287
Brian Silvermancb805822020-10-06 17:43:35 -0700288 std::chrono::nanoseconds max_message_fetch_time_ =
289 std::chrono::nanoseconds::zero();
290 int max_message_fetch_time_channel_ = -1;
291 int max_message_fetch_time_size_ = -1;
292 std::chrono::nanoseconds total_message_fetch_time_ =
293 std::chrono::nanoseconds::zero();
294 int total_message_fetch_count_ = 0;
295 int64_t total_message_fetch_bytes_ = 0;
296
297 std::chrono::nanoseconds total_nop_fetch_time_ =
298 std::chrono::nanoseconds::zero();
299 int total_nop_fetch_count_ = 0;
300
301 std::chrono::nanoseconds max_copy_time_ = std::chrono::nanoseconds::zero();
302 int max_copy_time_channel_ = -1;
303 int max_copy_time_size_ = -1;
304 std::chrono::nanoseconds total_copy_time_ = std::chrono::nanoseconds::zero();
305 int total_copy_count_ = 0;
306 int64_t total_copy_bytes_ = 0;
307
Brian Silverman1f345222020-09-24 21:14:48 -0700308 std::vector<FetcherStruct> fetchers_;
309 TimerHandler *timer_handler_;
310
311 // Period to poll the channels.
312 std::chrono::nanoseconds polling_period_ = std::chrono::milliseconds(100);
313
314 // Last time that data was written for all channels to disk.
315 monotonic_clock::time_point last_synchronized_time_;
316
317 // Max size that the header has consumed. This much extra data will be
318 // reserved in the builder to avoid reallocating.
319 size_t max_header_size_ = 0;
320
Austin Schuh8c399962020-12-25 21:51:45 -0800321 // If true, write the message header into a separate file.
322 bool separate_config_ = true;
323
Brian Silverman1f345222020-09-24 21:14:48 -0700324 // Fetcher for all the statistics from all the nodes.
325 aos::Fetcher<message_bridge::ServerStatistics> server_statistics_fetcher_;
326
Austin Schuh2f8fd752020-09-01 22:38:28 -0700327 std::vector<NodeState> node_state_;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800328};
329
Austin Schuh11d43732020-09-21 17:28:30 -0700330std::vector<std::vector<std::string>> ToLogReaderVector(
331 const std::vector<LogFile> &log_files);
Austin Schuh5212cad2020-09-09 23:12:09 -0700332
Austin Schuh6f3babe2020-01-26 20:34:50 -0800333// We end up with one of the following 3 log file types.
334//
335// Single node logged as the source node.
336// -> Replayed just on the source node.
337//
338// Forwarding timestamps only logged from the perspective of the destination
339// node.
340// -> Matched with data on source node and logged.
341//
342// Forwarding timestamps with data logged as the destination node.
343// -> Replayed just as the destination
344// -> Replayed as the source (Much harder, ordering is not defined)
345//
346// Duplicate data logged. -> CHECK that it matches and explode otherwise.
347//
348// This can be boiled down to a set of constraints and tools.
349//
350// 1) Forwarding timestamps and data need to be logged separately.
351// 2) Any forwarded data logged on the destination node needs to be logged
352// separately such that it can be sorted.
353//
354// 1) Log reader needs to be able to sort a list of log files.
355// 2) Log reader needs to be able to merge sorted lists of log files.
356// 3) Log reader needs to be able to match timestamps with messages.
357//
358// We also need to be able to generate multiple views of a log file depending on
359// the target.
360
Austin Schuhe309d2a2019-11-29 13:25:21 -0800361// Replays all the channels in the logfile to the event loop.
362class LogReader {
363 public:
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800364 // If you want to supply a new configuration that will be used for replay
365 // (e.g., to change message rates, or to populate an updated schema), then
366 // pass it in here. It must provide all the channels that the original logged
367 // config did.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800368 //
Austin Schuh287d43d2020-12-04 20:19:33 -0800369 // The single file constructor calls SortParts internally.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800370 LogReader(std::string_view filename,
371 const Configuration *replay_configuration = nullptr);
Austin Schuh287d43d2020-12-04 20:19:33 -0800372 LogReader(std::vector<LogFile> log_files,
Austin Schuh11d43732020-09-21 17:28:30 -0700373 const Configuration *replay_configuration = nullptr);
James Kuszmaul7daef362019-12-31 18:28:17 -0800374 ~LogReader();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800375
Austin Schuh6331ef92020-01-07 18:28:09 -0800376 // Registers all the callbacks to send the log file data out on an event loop
377 // created in event_loop_factory. This also updates time to be at the start
378 // of the log file by running until the log file starts.
379 // Note: the configuration used in the factory should be configuration()
380 // below, but can be anything as long as the locations needed to send
381 // everything are available.
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800382 void Register(SimulatedEventLoopFactory *event_loop_factory);
Austin Schuh6331ef92020-01-07 18:28:09 -0800383 // Creates an SimulatedEventLoopFactory accessible via event_loop_factory(),
384 // and then calls Register.
385 void Register();
386 // Registers callbacks for all the events after the log file starts. This is
387 // only useful when replaying live.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800388 void Register(EventLoop *event_loop);
Austin Schuh6331ef92020-01-07 18:28:09 -0800389
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800390 // Unregisters the senders. You only need to call this if you separately
391 // supplied an event loop or event loop factory and the lifetimes are such
392 // that they need to be explicitly destroyed before the LogReader destructor
393 // gets called.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800394 void Deregister();
395
Austin Schuh0c297012020-09-16 18:41:59 -0700396 // Returns the configuration being used for replay from the log file.
397 // Note that this may be different from the configuration actually used for
398 // handling events. You should generally only use this to create a
399 // SimulatedEventLoopFactory, and then get the configuration from there for
400 // everything else.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800401 const Configuration *logged_configuration() const;
Austin Schuh11d43732020-09-21 17:28:30 -0700402 // Returns the configuration being used for replay from the log file.
403 // Note that this may be different from the configuration actually used for
404 // handling events. You should generally only use this to create a
405 // SimulatedEventLoopFactory, and then get the configuration from there for
406 // everything else.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800407 // The pointer is invalidated whenever RemapLoggedChannel is called.
Austin Schuh15649d62019-12-28 16:36:38 -0800408 const Configuration *configuration() const;
409
Austin Schuh6f3babe2020-01-26 20:34:50 -0800410 // Returns the nodes that this log file was created on. This is a list of
411 // pointers to a node in the nodes() list inside configuration(). The
412 // pointers here are invalidated whenever RemapLoggedChannel is called.
413 std::vector<const Node *> Nodes() const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800414
415 // Returns the starting timestamp for the log file.
Austin Schuh11d43732020-09-21 17:28:30 -0700416 monotonic_clock::time_point monotonic_start_time(
417 const Node *node = nullptr) const;
418 realtime_clock::time_point realtime_start_time(
419 const Node *node = nullptr) const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800420
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800421 // Causes the logger to publish the provided channel on a different name so
422 // that replayed applications can publish on the proper channel name without
423 // interference. This operates on raw channel names, without any node or
424 // application specific mappings.
425 void RemapLoggedChannel(std::string_view name, std::string_view type,
Austin Schuh0de30f32020-12-06 12:44:28 -0800426 std::string_view add_prefix = "/original",
427 std::string_view new_type = "");
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800428 template <typename T>
429 void RemapLoggedChannel(std::string_view name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800430 std::string_view add_prefix = "/original",
431 std::string_view new_type = "") {
432 RemapLoggedChannel(name, T::GetFullyQualifiedName(), add_prefix, new_type);
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800433 }
434
Austin Schuh01b4c352020-09-21 23:09:39 -0700435 // Remaps the provided channel, though this respects node mappings, and
436 // preserves them too. This makes it so if /aos -> /pi1/aos on one node,
437 // /original/aos -> /original/pi1/aos on the same node after renaming, just
Austin Schuh0de30f32020-12-06 12:44:28 -0800438 // like you would hope. If new_type is not empty, the new channel will use
439 // the provided type instead. This allows for renaming messages.
Austin Schuh01b4c352020-09-21 23:09:39 -0700440 //
441 // TODO(austin): If you have 2 nodes remapping something to the same channel,
442 // this doesn't handle that. No use cases exist yet for that, so it isn't
443 // being done yet.
444 void RemapLoggedChannel(std::string_view name, std::string_view type,
445 const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800446 std::string_view add_prefix = "/original",
447 std::string_view new_type = "");
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700448 template <typename T>
Austin Schuh01b4c352020-09-21 23:09:39 -0700449 void RemapLoggedChannel(std::string_view name, const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800450 std::string_view add_prefix = "/original",
451 std::string_view new_type = "") {
452 RemapLoggedChannel(name, T::GetFullyQualifiedName(), node, add_prefix,
453 new_type);
Austin Schuh01b4c352020-09-21 23:09:39 -0700454 }
455
456 template <typename T>
457 bool HasChannel(std::string_view name, const Node *node = nullptr) {
Austin Schuh0ca51f32020-12-25 21:51:45 -0800458 return configuration::GetChannel(logged_configuration(), name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800459 T::GetFullyQualifiedName(), "", node,
460 true) != nullptr;
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700461 }
462
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800463 SimulatedEventLoopFactory *event_loop_factory() {
464 return event_loop_factory_;
465 }
466
Austin Schuh0ca51f32020-12-25 21:51:45 -0800467 std::string_view name() const { return log_files_[0].name; }
Austin Schuh0c297012020-09-16 18:41:59 -0700468
James Kuszmaul71a81932020-12-15 21:08:01 -0800469 // Set whether to exit the SimulatedEventLoopFactory when we finish reading
470 // the logfile.
471 void set_exit_on_finish(bool exit_on_finish) {
472 exit_on_finish_ = exit_on_finish;
473 }
474
Austin Schuhe309d2a2019-11-29 13:25:21 -0800475 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800476 const Channel *RemapChannel(const EventLoop *event_loop,
477 const Channel *channel);
478
Austin Schuhe309d2a2019-11-29 13:25:21 -0800479 // Queues at least max_out_of_order_duration_ messages into channels_.
480 void QueueMessages();
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800481 // Handle constructing a configuration with all the additional remapped
482 // channels from calls to RemapLoggedChannel.
483 void MakeRemappedConfig();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800484
Austin Schuh2f8fd752020-09-01 22:38:28 -0700485 // Returns the number of nodes.
486 size_t nodes_count() const {
487 return !configuration::MultiNode(logged_configuration())
488 ? 1u
489 : logged_configuration()->nodes()->size();
490 }
491
Austin Schuh287d43d2020-12-04 20:19:33 -0800492 const std::vector<LogFile> log_files_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800493
Austin Schuh6f3babe2020-01-26 20:34:50 -0800494 // State per node.
Austin Schuh858c9f32020-08-31 16:56:12 -0700495 class State {
496 public:
Austin Schuh287d43d2020-12-04 20:19:33 -0800497 State(std::unique_ptr<TimestampMapper> timestamp_mapper);
498
499 // Connects up the timestamp mappers.
500 void AddPeer(State *peer);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800501
Austin Schuh858c9f32020-08-31 16:56:12 -0700502 // Returns the timestamps, channel_index, and message from a channel.
503 // update_time (will be) set to true when popping this message causes the
504 // filter to change the time offset estimation function.
Austin Schuh287d43d2020-12-04 20:19:33 -0800505 TimestampedMessage PopOldest(bool *update_time);
Austin Schuh858c9f32020-08-31 16:56:12 -0700506
507 // Returns the monotonic time of the oldest message.
508 monotonic_clock::time_point OldestMessageTime() const;
509
510 // Primes the queues inside State. Should be called before calling
511 // OldestMessageTime.
512 void SeedSortedMessages();
Austin Schuh8bd96322020-02-13 21:18:22 -0800513
Austin Schuh858c9f32020-08-31 16:56:12 -0700514 // Returns the starting time for this node.
515 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800516 return timestamp_mapper_ ? timestamp_mapper_->monotonic_start_time()
517 : monotonic_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700518 }
519 realtime_clock::time_point realtime_start_time() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800520 return timestamp_mapper_ ? timestamp_mapper_->realtime_start_time()
521 : realtime_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700522 }
523
524 // Sets the node event loop factory for replaying into a
525 // SimulatedEventLoopFactory. Returns the EventLoop to use.
526 EventLoop *SetNodeEventLoopFactory(
527 NodeEventLoopFactory *node_event_loop_factory);
528
529 // Sets and gets the event loop to use.
530 void set_event_loop(EventLoop *event_loop) { event_loop_ = event_loop; }
531 EventLoop *event_loop() { return event_loop_; }
532
Austin Schuh858c9f32020-08-31 16:56:12 -0700533 // Sets the current realtime offset from the monotonic clock for this node
534 // (if we are on a simulated event loop).
535 void SetRealtimeOffset(monotonic_clock::time_point monotonic_time,
536 realtime_clock::time_point realtime_time) {
537 if (node_event_loop_factory_ != nullptr) {
538 node_event_loop_factory_->SetRealtimeOffset(monotonic_time,
539 realtime_time);
540 }
541 }
542
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700543 // Returns the MessageHeader sender to log delivery timestamps to for the
544 // provided remote node.
Austin Schuh0de30f32020-12-06 12:44:28 -0800545 aos::Sender<message_bridge::RemoteMessage> *RemoteTimestampSender(
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700546 const Node *delivered_node);
547
Austin Schuh858c9f32020-08-31 16:56:12 -0700548 // Converts a timestamp from the monotonic clock on this node to the
549 // distributed clock.
550 distributed_clock::time_point ToDistributedClock(
551 monotonic_clock::time_point time) {
552 return node_event_loop_factory_->ToDistributedClock(time);
553 }
554
Austin Schuh858c9f32020-08-31 16:56:12 -0700555 // Returns the current time on the remote node which sends messages on
556 // channel_index.
557 monotonic_clock::time_point monotonic_remote_now(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700558 return channel_source_state_[channel_index]
559 ->node_event_loop_factory_->monotonic_now();
Austin Schuh858c9f32020-08-31 16:56:12 -0700560 }
561
Austin Schuh2f8fd752020-09-01 22:38:28 -0700562 distributed_clock::time_point RemoteToDistributedClock(
563 size_t channel_index, monotonic_clock::time_point time) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700564 return channel_source_state_[channel_index]
565 ->node_event_loop_factory_->ToDistributedClock(time);
Austin Schuh2f8fd752020-09-01 22:38:28 -0700566 }
567
568 const Node *remote_node(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700569 return channel_source_state_[channel_index]
570 ->node_event_loop_factory_->node();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700571 }
572
573 monotonic_clock::time_point monotonic_now() {
574 return node_event_loop_factory_->monotonic_now();
575 }
576
Austin Schuh858c9f32020-08-31 16:56:12 -0700577 // Sets the number of channels.
578 void SetChannelCount(size_t count);
579
580 // Sets the sender, filter, and target factory for a channel.
Austin Schuh0de30f32020-12-06 12:44:28 -0800581 void SetChannel(
582 size_t logged_channel_index, size_t factory_channel_index,
583 std::unique_ptr<RawSender> sender,
584 message_bridge::NoncausalOffsetEstimator *filter,
585 aos::Sender<message_bridge::RemoteMessage> *remote_timestamp_sender,
586 State *source_state);
Austin Schuh858c9f32020-08-31 16:56:12 -0700587
588 // Returns if we have read all the messages from all the logs.
Austin Schuh287d43d2020-12-04 20:19:33 -0800589 bool at_end() const {
590 return timestamp_mapper_ ? timestamp_mapper_->Front() == nullptr : true;
591 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700592
593 // Unregisters everything so we can destory the event loop.
594 void Deregister();
595
596 // Sets the current TimerHandle for the replay callback.
597 void set_timer_handler(TimerHandler *timer_handler) {
598 timer_handler_ = timer_handler;
599 }
600
601 // Sets the next wakeup time on the replay callback.
602 void Setup(monotonic_clock::time_point next_time) {
603 timer_handler_->Setup(next_time);
604 }
605
606 // Sends a buffer on the provided channel index.
Austin Schuh287d43d2020-12-04 20:19:33 -0800607 bool Send(const TimestampedMessage &timestamped_message);
Austin Schuh858c9f32020-08-31 16:56:12 -0700608
609 // Returns a debug string for the channel merger.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700610 std::string DebugString() const {
611 std::stringstream messages;
612 size_t i = 0;
613 for (const auto &message : sorted_messages_) {
614 if (i < 7 || i + 7 > sorted_messages_.size()) {
615 messages << "sorted_messages[" << i
616 << "]: " << std::get<0>(message).monotonic_event_time << " "
617 << configuration::StrippedChannelToString(
618 event_loop_->configuration()->channels()->Get(
Austin Schuh287d43d2020-12-04 20:19:33 -0800619 std::get<0>(message).channel_index))
Austin Schuh2f8fd752020-09-01 22:38:28 -0700620 << "\n";
621 } else if (i == 7) {
622 messages << "...\n";
623 }
624 ++i;
625 }
Austin Schuh287d43d2020-12-04 20:19:33 -0800626 if (!timestamp_mapper_) {
627 return messages.str();
628 }
629 return messages.str() + timestamp_mapper_->DebugString();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700630 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700631
632 private:
633 // Log file.
Austin Schuh287d43d2020-12-04 20:19:33 -0800634 std::unique_ptr<TimestampMapper> timestamp_mapper_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700635
Austin Schuh287d43d2020-12-04 20:19:33 -0800636 std::deque<std::tuple<TimestampedMessage,
Austin Schuh2f8fd752020-09-01 22:38:28 -0700637 message_bridge::NoncausalOffsetEstimator *>>
Austin Schuh858c9f32020-08-31 16:56:12 -0700638 sorted_messages_;
639
640 // Senders.
641 std::vector<std::unique_ptr<RawSender>> channels_;
Austin Schuh0de30f32020-12-06 12:44:28 -0800642 std::vector<aos::Sender<message_bridge::RemoteMessage> *>
643 remote_timestamp_senders_;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700644 // The mapping from logged channel index to sent channel index. Needed for
645 // sending out MessageHeaders.
646 std::vector<int> factory_channel_index_;
647
648 struct SentTimestamp {
649 monotonic_clock::time_point monotonic_event_time =
650 monotonic_clock::min_time;
651 realtime_clock::time_point realtime_event_time = realtime_clock::min_time;
652 uint32_t queue_index = 0xffffffff;
653
654 // The queue index that this message *actually* was sent with.
655 uint32_t actual_queue_index = 0xffffffff;
656 };
657
658 // Stores all the timestamps that have been sent on this channel. This is
659 // only done for channels which are forwarded and on the node which
660 // initially sends the message.
661 //
662 // TODO(austin): This whole concept is a hack. We should be able to
663 // associate state with the message as it gets sorted and recover it.
664 std::vector<std::unique_ptr<std::vector<SentTimestamp>>> queue_index_map_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700665
666 // Factory (if we are in sim) that this loop was created on.
667 NodeEventLoopFactory *node_event_loop_factory_ = nullptr;
668 std::unique_ptr<EventLoop> event_loop_unique_ptr_;
669 // Event loop.
670 EventLoop *event_loop_ = nullptr;
671 // And timer used to send messages.
672 TimerHandler *timer_handler_;
673
Austin Schuh8bd96322020-02-13 21:18:22 -0800674 // Filters (or nullptr if it isn't a forwarded channel) for each channel.
675 // This corresponds to the object which is shared among all the channels
676 // going between 2 nodes. The second element in the tuple indicates if this
677 // is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700678 std::vector<message_bridge::NoncausalOffsetEstimator *> filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800679
680 // List of NodeEventLoopFactorys (or nullptr if it isn't a forwarded
681 // channel) which correspond to the originating node.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700682 std::vector<State *> channel_source_state_;
683
Austin Schuh0de30f32020-12-06 12:44:28 -0800684 std::map<const Node *, aos::Sender<message_bridge::RemoteMessage>>
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700685 remote_timestamp_senders_map_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800686 };
687
Austin Schuh8bd96322020-02-13 21:18:22 -0800688 // Node index -> State.
689 std::vector<std::unique_ptr<State>> states_;
690
691 // Creates the requested filter if it doesn't exist, regardless of whether
692 // these nodes can actually communicate directly. The second return value
693 // reports if this is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700694 message_bridge::NoncausalOffsetEstimator *GetFilter(const Node *node_a,
695 const Node *node_b);
Austin Schuh8bd96322020-02-13 21:18:22 -0800696
Austin Schuh8bd96322020-02-13 21:18:22 -0800697 // List of filters for a connection. The pointer to the first node will be
698 // less than the second node.
Austin Schuh0ca1fd32020-12-18 22:53:05 -0800699 std::unique_ptr<message_bridge::MultiNodeNoncausalOffsetEstimator> filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800700
701 // Updates the offset matrix solution and sets the per-node distributed
702 // offsets in the factory.
703 void UpdateOffsets();
704
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800705 std::unique_ptr<FlatbufferDetachedBuffer<Configuration>>
706 remapped_configuration_buffer_;
707
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800708 std::unique_ptr<SimulatedEventLoopFactory> event_loop_factory_unique_ptr_;
709 SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800710
711 // Map of channel indices to new name. The channel index will be an index into
712 // logged_configuration(), and the string key will be the name of the channel
713 // to send on instead of the logged channel name.
Austin Schuh0de30f32020-12-06 12:44:28 -0800714 struct RemappedChannel {
715 std::string remapped_name;
716 std::string new_type;
717 };
718 std::map<size_t, RemappedChannel> remapped_channels_;
Austin Schuh01b4c352020-09-21 23:09:39 -0700719 std::vector<MapT> maps_;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800720
Austin Schuh6f3babe2020-01-26 20:34:50 -0800721 // Number of nodes which still have data to send. This is used to figure out
722 // when to exit.
723 size_t live_nodes_ = 0;
724
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800725 const Configuration *remapped_configuration_ = nullptr;
726 const Configuration *replay_configuration_ = nullptr;
Austin Schuhcde938c2020-02-02 17:30:07 -0800727
728 // If true, the replay timer will ignore any missing data. This is used
729 // during startup when we are bootstrapping everything and trying to get to
730 // the start of all the log files.
731 bool ignore_missing_data_ = false;
James Kuszmaul71a81932020-12-15 21:08:01 -0800732
733 // Whether to exit the SimulatedEventLoop when we finish reading the logs.
734 bool exit_on_finish_ = true;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800735};
736
737} // namespace logger
738} // namespace aos
739
740#endif // AOS_EVENTS_LOGGER_H_