blob: e2216957a1a9a1d578f57c3a24c0b920131b70b0 [file] [log] [blame]
Austin Schuhe309d2a2019-11-29 13:25:21 -08001#ifndef AOS_EVENTS_LOGGER_H_
2#define AOS_EVENTS_LOGGER_H_
3
Austin Schuh8bd96322020-02-13 21:18:22 -08004#include <chrono>
Austin Schuhe309d2a2019-11-29 13:25:21 -08005#include <deque>
Austin Schuh05b70472020-01-01 17:11:17 -08006#include <string_view>
Austin Schuh2f8fd752020-09-01 22:38:28 -07007#include <tuple>
Austin Schuh6f3babe2020-01-26 20:34:50 -08008#include <vector>
Austin Schuhe309d2a2019-11-29 13:25:21 -08009
Austin Schuhe309d2a2019-11-29 13:25:21 -080010#include "aos/events/event_loop.h"
Austin Schuhcb5601b2020-09-10 15:29:59 -070011#include "aos/events/logging/log_namer.h"
Austin Schuhf6f9bf32020-10-11 14:37:43 -070012#include "aos/events/logging/logfile_sorting.h"
Austin Schuha36c8902019-12-30 18:07:15 -080013#include "aos/events/logging/logfile_utils.h"
James Kuszmaul38735e82019-12-07 16:42:06 -080014#include "aos/events/logging/logger_generated.h"
Austin Schuh64fab802020-09-09 22:47:47 -070015#include "aos/events/logging/uuid.h"
Austin Schuh92547522019-12-28 14:33:43 -080016#include "aos/events/simulated_event_loop.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070017#include "aos/network/message_bridge_server_generated.h"
Austin Schuh0ca1fd32020-12-18 22:53:05 -080018#include "aos/network/multinode_timestamp_filter.h"
Austin Schuh0de30f32020-12-06 12:44:28 -080019#include "aos/network/remote_message_generated.h"
Austin Schuh8bd96322020-02-13 21:18:22 -080020#include "aos/network/timestamp_filter.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080021#include "aos/time/time.h"
22#include "flatbuffers/flatbuffers.h"
23
24namespace aos {
25namespace logger {
26
Austin Schuhe309d2a2019-11-29 13:25:21 -080027// Logs all channels available in the event loop to disk every 100 ms.
28// Start by logging one message per channel to capture any state and
29// configuration that is sent rately on a channel and would affect execution.
30class Logger {
31 public:
Austin Schuh0c297012020-09-16 18:41:59 -070032 // Constructs a logger.
Austin Schuh0c297012020-09-16 18:41:59 -070033 // event_loop: The event loop used to read the messages.
Austin Schuh0c297012020-09-16 18:41:59 -070034 // configuration: When provided, this is the configuration to log, and the
35 // configuration to use for the channel list to log. If not provided,
36 // this becomes the configuration from the event loop.
Brian Silverman1f345222020-09-24 21:14:48 -070037 // should_log: When provided, a filter for channels to log. If not provided,
38 // all available channels are logged.
39 Logger(EventLoop *event_loop)
40 : Logger(event_loop, event_loop->configuration()) {}
41 Logger(EventLoop *event_loop, const Configuration *configuration)
42 : Logger(event_loop, configuration,
43 [](const Channel *) { return true; }) {}
44 Logger(EventLoop *event_loop, const Configuration *configuration,
45 std::function<bool(const Channel *)> should_log);
Austin Schuh0c297012020-09-16 18:41:59 -070046 ~Logger();
47
48 // Overrides the name in the log file header.
49 void set_name(std::string_view name) { name_ = name; }
Austin Schuhe309d2a2019-11-29 13:25:21 -080050
Brian Silverman1f345222020-09-24 21:14:48 -070051 // Sets the callback to run after each period of data is logged. Defaults to
52 // doing nothing.
53 //
54 // This callback may safely do things like call Rotate().
55 void set_on_logged_period(std::function<void()> on_logged_period) {
56 on_logged_period_ = std::move(on_logged_period);
57 }
58
Austin Schuh8c399962020-12-25 21:51:45 -080059 void set_separate_config(bool separate_config) {
60 separate_config_ = separate_config;
61 }
62
Brian Silverman1f345222020-09-24 21:14:48 -070063 // Sets the period between polling the data. Defaults to 100ms.
64 //
65 // Changing this while a set of files is being written may result in
66 // unreadable files.
67 void set_polling_period(std::chrono::nanoseconds polling_period) {
68 polling_period_ = polling_period;
69 }
70
Brian Silvermanae7c0332020-09-30 16:58:23 -070071 std::string_view log_start_uuid() const { return log_start_uuid_; }
Brian Silverman035e4182020-10-06 17:13:00 -070072 UUID logger_instance_uuid() const { return logger_instance_uuid_; }
Brian Silvermanae7c0332020-09-30 16:58:23 -070073
Brian Silvermancb805822020-10-06 17:43:35 -070074 // The maximum time for a single fetch which returned a message, or 0 if none
75 // of those have happened.
76 std::chrono::nanoseconds max_message_fetch_time() const {
77 return max_message_fetch_time_;
78 }
79 // The channel for that longest fetch which returned a message, or -1 if none
80 // of those have happened.
81 int max_message_fetch_time_channel() const {
82 return max_message_fetch_time_channel_;
83 }
84 // The size of the message returned by that longest fetch, or -1 if none of
85 // those have happened.
86 int max_message_fetch_time_size() const {
87 return max_message_fetch_time_size_;
88 }
89 // The total time spent fetching messages.
90 std::chrono::nanoseconds total_message_fetch_time() const {
91 return total_message_fetch_time_;
92 }
93 // The total number of fetch calls which returned messages.
94 int total_message_fetch_count() const { return total_message_fetch_count_; }
95 // The total number of bytes fetched.
96 int64_t total_message_fetch_bytes() const {
97 return total_message_fetch_bytes_;
98 }
99
100 // The total time spent in fetches which did not return a message.
101 std::chrono::nanoseconds total_nop_fetch_time() const {
102 return total_nop_fetch_time_;
103 }
104 // The total number of fetches which did not return a message.
105 int total_nop_fetch_count() const { return total_nop_fetch_count_; }
106
107 // The maximum time for a single copy, or 0 if none of those have happened.
108 std::chrono::nanoseconds max_copy_time() const { return max_copy_time_; }
109 // The channel for that longest copy, or -1 if none of those have happened.
110 int max_copy_time_channel() const { return max_copy_time_channel_; }
111 // The size of the message for that longest copy, or -1 if none of those have
112 // happened.
113 int max_copy_time_size() const { return max_copy_time_size_; }
114 // The total time spent copying messages.
115 std::chrono::nanoseconds total_copy_time() const { return total_copy_time_; }
116 // The total number of messages copied.
117 int total_copy_count() const { return total_copy_count_; }
118 // The total number of bytes copied.
119 int64_t total_copy_bytes() const { return total_copy_bytes_; }
120
121 void ResetStatisics();
122
Austin Schuh2f8fd752020-09-01 22:38:28 -0700123 // Rotates the log file(s), triggering new part files to be written for each
124 // log file.
125 void Rotate();
Austin Schuhfa895892020-01-07 20:07:41 -0800126
Brian Silverman1f345222020-09-24 21:14:48 -0700127 // Starts logging to files with the given naming scheme.
Brian Silvermanae7c0332020-09-30 16:58:23 -0700128 //
129 // log_start_uuid may be used to tie this log event to other log events across
130 // multiple nodes. The default (empty string) indicates there isn't one
131 // available.
132 void StartLogging(std::unique_ptr<LogNamer> log_namer,
133 std::string_view log_start_uuid = "");
Brian Silverman1f345222020-09-24 21:14:48 -0700134
135 // Stops logging. Ensures any messages through end_time make it into the log.
136 //
137 // If you want to stop ASAP, pass min_time to avoid reading any more messages.
138 //
139 // Returns the LogNamer in case the caller wants to do anything else with it
140 // before destroying it.
141 std::unique_ptr<LogNamer> StopLogging(
142 aos::monotonic_clock::time_point end_time);
143
144 // Returns whether a log is currently being written.
145 bool is_started() const { return static_cast<bool>(log_namer_); }
146
147 // Shortcut to call StartLogging with a LocalLogNamer when event processing
148 // starts.
149 void StartLoggingLocalNamerOnRun(std::string base_name) {
150 event_loop_->OnRun([this, base_name]() {
151 StartLogging(
152 std::make_unique<LocalLogNamer>(base_name, event_loop_->node()));
153 });
154 }
155
Austin Schuhe309d2a2019-11-29 13:25:21 -0800156 private:
Austin Schuhe309d2a2019-11-29 13:25:21 -0800157 // Structure to track both a fetcher, and if the data fetched has been
158 // written. We may want to delay writing data to disk so that we don't let
159 // data get too far out of order when written to disk so we can avoid making
160 // it too hard to sort when reading.
161 struct FetcherStruct {
162 std::unique_ptr<RawFetcher> fetcher;
163 bool written = false;
Austin Schuh15649d62019-12-28 16:36:38 -0800164
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700165 // Channel index to log to.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800166 int channel_index = -1;
Brian Silverman1f345222020-09-24 21:14:48 -0700167 const Channel *channel = nullptr;
168 const Node *timestamp_node = nullptr;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800169
170 LogType log_type = LogType::kLogMessage;
171
Brian Silverman1f345222020-09-24 21:14:48 -0700172 // We fill out the metadata at construction, but the actual writers have to
173 // be updated each time we start logging. To avoid duplicating the complex
174 // logic determining whether each writer should be initialized, we just
175 // stash the answer in separate member variables.
176 bool wants_writer = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800177 DetachedBufferWriter *writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700178 bool wants_timestamp_writer = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800179 DetachedBufferWriter *timestamp_writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700180 bool wants_contents_writer = false;
Austin Schuh2f8fd752020-09-01 22:38:28 -0700181 DetachedBufferWriter *contents_writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700182
Austin Schuh315b96b2020-12-11 21:21:12 -0800183 // Node which this data is from, or -1 if it is unknown.
184 int data_node_index = -1;
185 // Node that this timestamp is for, or -1 if it is known.
186 int timestamp_node_index = -1;
187 // Node that the contents this contents_writer will log are from.
188 int contents_node_index = -1;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800189 };
190
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700191 // Vector mapping from the channel index from the event loop to the logged
192 // channel index.
193 std::vector<int> event_loop_to_logged_channel_index_;
194
Austin Schuh2f8fd752020-09-01 22:38:28 -0700195 struct NodeState {
196 aos::monotonic_clock::time_point monotonic_start_time =
197 aos::monotonic_clock::min_time;
198 aos::realtime_clock::time_point realtime_start_time =
199 aos::realtime_clock::min_time;
200
Austin Schuh315b96b2020-12-11 21:21:12 -0800201 bool has_source_node_boot_uuid = false;
202
203 // This is an initial UUID that is a valid UUID4 and is pretty obvious that
204 // it isn't valid.
205 std::string source_node_boot_uuid = "00000000-0000-4000-8000-000000000000";
206
Austin Schuh2f8fd752020-09-01 22:38:28 -0700207 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> log_file_header =
208 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader>::Empty();
Austin Schuh315b96b2020-12-11 21:21:12 -0800209
210 // True if a header has been written to the start of a log file.
211 bool header_written = false;
212 // True if the current written header represents the contents which will
213 // follow. This is cleared when boot_uuid is known to not match anymore.
214 bool header_valid = false;
215
216 // Sets the source_node_boot_uuid, properly updating everything.
217 void SetBootUUID(std::string_view new_source_node_boot_uuid) {
218 source_node_boot_uuid = new_source_node_boot_uuid;
219 header_valid = false;
220 has_source_node_boot_uuid = true;
221
222 flatbuffers::String *source_node_boot_uuid_string =
223 log_file_header.mutable_message()->mutable_source_node_boot_uuid();
224 CHECK_EQ(source_node_boot_uuid.size(),
225 source_node_boot_uuid_string->size());
226 memcpy(source_node_boot_uuid_string->data(), source_node_boot_uuid.data(),
227 source_node_boot_uuid.size());
228 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700229 };
Brian Silverman1f345222020-09-24 21:14:48 -0700230
231 void WriteHeader();
Austin Schuh315b96b2020-12-11 21:21:12 -0800232
Brian Silverman1f345222020-09-24 21:14:48 -0700233 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> MakeHeader(
Austin Schuh8c399962020-12-25 21:51:45 -0800234 const Node *node, std::string_view config_sha256);
Brian Silverman1f345222020-09-24 21:14:48 -0700235
Austin Schuh315b96b2020-12-11 21:21:12 -0800236 // Writes the header for the provided node if enough information is valid.
237 void MaybeWriteHeader(int node_index);
238 // Overload for when we already know node as well.
239 void MaybeWriteHeader(int node_index, const Node *node);
240
Brian Silverman1f345222020-09-24 21:14:48 -0700241 bool MaybeUpdateTimestamp(
242 const Node *node, int node_index,
243 aos::monotonic_clock::time_point monotonic_start_time,
244 aos::realtime_clock::time_point realtime_start_time);
245
246 void DoLogData(const monotonic_clock::time_point end_time);
247
248 void WriteMissingTimestamps();
249
250 // Fetches from each channel until all the data is logged.
251 void LogUntil(monotonic_clock::time_point t);
252
Brian Silvermancb805822020-10-06 17:43:35 -0700253 void RecordFetchResult(aos::monotonic_clock::time_point start,
254 aos::monotonic_clock::time_point end, bool got_new,
255 FetcherStruct *fetcher);
256
257 void RecordCreateMessageTime(aos::monotonic_clock::time_point start,
258 aos::monotonic_clock::time_point end,
259 FetcherStruct *fetcher);
260
Brian Silverman1f345222020-09-24 21:14:48 -0700261 // Sets the start time for a specific node.
Austin Schuh315b96b2020-12-11 21:21:12 -0800262 void SetStartTime(
263 size_t node_index, aos::monotonic_clock::time_point monotonic_start_time,
264 aos::realtime_clock::time_point realtime_start_time,
265 aos::monotonic_clock::time_point logger_monotonic_start_time,
266 aos::realtime_clock::time_point logger_realtime_start_time);
Brian Silverman1f345222020-09-24 21:14:48 -0700267
Brian Silvermanae7c0332020-09-30 16:58:23 -0700268 EventLoop *const event_loop_;
Brian Silverman1f345222020-09-24 21:14:48 -0700269 // The configuration to place at the top of the log file.
270 const Configuration *const configuration_;
271
Brian Silvermanae7c0332020-09-30 16:58:23 -0700272 UUID log_event_uuid_ = UUID::Zero();
273 const UUID logger_instance_uuid_ = UUID::Random();
274 std::unique_ptr<LogNamer> log_namer_;
275 // Empty indicates there isn't one.
276 std::string log_start_uuid_;
Brian Silvermanae7c0332020-09-30 16:58:23 -0700277
Brian Silverman1f345222020-09-24 21:14:48 -0700278 // Name to save in the log file. Defaults to hostname.
279 std::string name_;
280
281 std::function<void()> on_logged_period_ = []() {};
282
Brian Silvermancb805822020-10-06 17:43:35 -0700283 std::chrono::nanoseconds max_message_fetch_time_ =
284 std::chrono::nanoseconds::zero();
285 int max_message_fetch_time_channel_ = -1;
286 int max_message_fetch_time_size_ = -1;
287 std::chrono::nanoseconds total_message_fetch_time_ =
288 std::chrono::nanoseconds::zero();
289 int total_message_fetch_count_ = 0;
290 int64_t total_message_fetch_bytes_ = 0;
291
292 std::chrono::nanoseconds total_nop_fetch_time_ =
293 std::chrono::nanoseconds::zero();
294 int total_nop_fetch_count_ = 0;
295
296 std::chrono::nanoseconds max_copy_time_ = std::chrono::nanoseconds::zero();
297 int max_copy_time_channel_ = -1;
298 int max_copy_time_size_ = -1;
299 std::chrono::nanoseconds total_copy_time_ = std::chrono::nanoseconds::zero();
300 int total_copy_count_ = 0;
301 int64_t total_copy_bytes_ = 0;
302
Brian Silverman1f345222020-09-24 21:14:48 -0700303 std::vector<FetcherStruct> fetchers_;
304 TimerHandler *timer_handler_;
305
306 // Period to poll the channels.
307 std::chrono::nanoseconds polling_period_ = std::chrono::milliseconds(100);
308
309 // Last time that data was written for all channels to disk.
310 monotonic_clock::time_point last_synchronized_time_;
311
312 // Max size that the header has consumed. This much extra data will be
313 // reserved in the builder to avoid reallocating.
314 size_t max_header_size_ = 0;
315
Austin Schuh8c399962020-12-25 21:51:45 -0800316 // If true, write the message header into a separate file.
317 bool separate_config_ = true;
318
Brian Silverman1f345222020-09-24 21:14:48 -0700319 // Fetcher for all the statistics from all the nodes.
320 aos::Fetcher<message_bridge::ServerStatistics> server_statistics_fetcher_;
321
Austin Schuh2f8fd752020-09-01 22:38:28 -0700322 std::vector<NodeState> node_state_;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800323};
324
Austin Schuh11d43732020-09-21 17:28:30 -0700325std::vector<std::vector<std::string>> ToLogReaderVector(
326 const std::vector<LogFile> &log_files);
Austin Schuh5212cad2020-09-09 23:12:09 -0700327
Austin Schuh6f3babe2020-01-26 20:34:50 -0800328// We end up with one of the following 3 log file types.
329//
330// Single node logged as the source node.
331// -> Replayed just on the source node.
332//
333// Forwarding timestamps only logged from the perspective of the destination
334// node.
335// -> Matched with data on source node and logged.
336//
337// Forwarding timestamps with data logged as the destination node.
338// -> Replayed just as the destination
339// -> Replayed as the source (Much harder, ordering is not defined)
340//
341// Duplicate data logged. -> CHECK that it matches and explode otherwise.
342//
343// This can be boiled down to a set of constraints and tools.
344//
345// 1) Forwarding timestamps and data need to be logged separately.
346// 2) Any forwarded data logged on the destination node needs to be logged
347// separately such that it can be sorted.
348//
349// 1) Log reader needs to be able to sort a list of log files.
350// 2) Log reader needs to be able to merge sorted lists of log files.
351// 3) Log reader needs to be able to match timestamps with messages.
352//
353// We also need to be able to generate multiple views of a log file depending on
354// the target.
355
Austin Schuhe309d2a2019-11-29 13:25:21 -0800356// Replays all the channels in the logfile to the event loop.
357class LogReader {
358 public:
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800359 // If you want to supply a new configuration that will be used for replay
360 // (e.g., to change message rates, or to populate an updated schema), then
361 // pass it in here. It must provide all the channels that the original logged
362 // config did.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800363 //
Austin Schuh287d43d2020-12-04 20:19:33 -0800364 // The single file constructor calls SortParts internally.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800365 LogReader(std::string_view filename,
366 const Configuration *replay_configuration = nullptr);
Austin Schuh287d43d2020-12-04 20:19:33 -0800367 LogReader(std::vector<LogFile> log_files,
Austin Schuh11d43732020-09-21 17:28:30 -0700368 const Configuration *replay_configuration = nullptr);
James Kuszmaul7daef362019-12-31 18:28:17 -0800369 ~LogReader();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800370
Austin Schuh6331ef92020-01-07 18:28:09 -0800371 // Registers all the callbacks to send the log file data out on an event loop
372 // created in event_loop_factory. This also updates time to be at the start
373 // of the log file by running until the log file starts.
374 // Note: the configuration used in the factory should be configuration()
375 // below, but can be anything as long as the locations needed to send
376 // everything are available.
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800377 void Register(SimulatedEventLoopFactory *event_loop_factory);
Austin Schuh6331ef92020-01-07 18:28:09 -0800378 // Creates an SimulatedEventLoopFactory accessible via event_loop_factory(),
379 // and then calls Register.
380 void Register();
381 // Registers callbacks for all the events after the log file starts. This is
382 // only useful when replaying live.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800383 void Register(EventLoop *event_loop);
Austin Schuh6331ef92020-01-07 18:28:09 -0800384
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800385 // Unregisters the senders. You only need to call this if you separately
386 // supplied an event loop or event loop factory and the lifetimes are such
387 // that they need to be explicitly destroyed before the LogReader destructor
388 // gets called.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800389 void Deregister();
390
Austin Schuh0c297012020-09-16 18:41:59 -0700391 // Returns the configuration being used for replay from the log file.
392 // Note that this may be different from the configuration actually used for
393 // handling events. You should generally only use this to create a
394 // SimulatedEventLoopFactory, and then get the configuration from there for
395 // everything else.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800396 const Configuration *logged_configuration() const;
Austin Schuh11d43732020-09-21 17:28:30 -0700397 // Returns the configuration being used for replay from the log file.
398 // Note that this may be different from the configuration actually used for
399 // handling events. You should generally only use this to create a
400 // SimulatedEventLoopFactory, and then get the configuration from there for
401 // everything else.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800402 // The pointer is invalidated whenever RemapLoggedChannel is called.
Austin Schuh15649d62019-12-28 16:36:38 -0800403 const Configuration *configuration() const;
404
Austin Schuh6f3babe2020-01-26 20:34:50 -0800405 // Returns the nodes that this log file was created on. This is a list of
Austin Schuh07676622021-01-21 18:59:17 -0800406 // pointers to a node in the nodes() list inside logged_configuration().
407 std::vector<const Node *> LoggedNodes() const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800408
409 // Returns the starting timestamp for the log file.
Austin Schuh11d43732020-09-21 17:28:30 -0700410 monotonic_clock::time_point monotonic_start_time(
411 const Node *node = nullptr) const;
412 realtime_clock::time_point realtime_start_time(
413 const Node *node = nullptr) const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800414
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800415 // Causes the logger to publish the provided channel on a different name so
416 // that replayed applications can publish on the proper channel name without
417 // interference. This operates on raw channel names, without any node or
418 // application specific mappings.
419 void RemapLoggedChannel(std::string_view name, std::string_view type,
Austin Schuh0de30f32020-12-06 12:44:28 -0800420 std::string_view add_prefix = "/original",
421 std::string_view new_type = "");
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800422 template <typename T>
423 void RemapLoggedChannel(std::string_view name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800424 std::string_view add_prefix = "/original",
425 std::string_view new_type = "") {
426 RemapLoggedChannel(name, T::GetFullyQualifiedName(), add_prefix, new_type);
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800427 }
428
Austin Schuh01b4c352020-09-21 23:09:39 -0700429 // Remaps the provided channel, though this respects node mappings, and
430 // preserves them too. This makes it so if /aos -> /pi1/aos on one node,
431 // /original/aos -> /original/pi1/aos on the same node after renaming, just
Austin Schuh0de30f32020-12-06 12:44:28 -0800432 // like you would hope. If new_type is not empty, the new channel will use
433 // the provided type instead. This allows for renaming messages.
Austin Schuh01b4c352020-09-21 23:09:39 -0700434 //
435 // TODO(austin): If you have 2 nodes remapping something to the same channel,
436 // this doesn't handle that. No use cases exist yet for that, so it isn't
437 // being done yet.
438 void RemapLoggedChannel(std::string_view name, std::string_view type,
439 const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800440 std::string_view add_prefix = "/original",
441 std::string_view new_type = "");
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700442 template <typename T>
Austin Schuh01b4c352020-09-21 23:09:39 -0700443 void RemapLoggedChannel(std::string_view name, const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800444 std::string_view add_prefix = "/original",
445 std::string_view new_type = "") {
446 RemapLoggedChannel(name, T::GetFullyQualifiedName(), node, add_prefix,
447 new_type);
Austin Schuh01b4c352020-09-21 23:09:39 -0700448 }
449
450 template <typename T>
451 bool HasChannel(std::string_view name, const Node *node = nullptr) {
Austin Schuh0ca51f32020-12-25 21:51:45 -0800452 return configuration::GetChannel(logged_configuration(), name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800453 T::GetFullyQualifiedName(), "", node,
454 true) != nullptr;
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700455 }
456
James Kuszmaul4f106fb2021-01-05 20:53:02 -0800457 // Returns true if the channel exists on the node and was logged.
458 template <typename T>
459 bool HasLoggedChannel(std::string_view name, const Node *node = nullptr) {
460 const Channel *channel = configuration::GetChannel(logged_configuration(), name,
461 T::GetFullyQualifiedName(), "", node,
462 true);
463 if (channel == nullptr) return false;
464 return channel->logger() != LoggerConfig::NOT_LOGGED;
465 }
466
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800467 SimulatedEventLoopFactory *event_loop_factory() {
468 return event_loop_factory_;
469 }
470
Austin Schuh0ca51f32020-12-25 21:51:45 -0800471 std::string_view name() const { return log_files_[0].name; }
Austin Schuh0c297012020-09-16 18:41:59 -0700472
James Kuszmaul71a81932020-12-15 21:08:01 -0800473 // Set whether to exit the SimulatedEventLoopFactory when we finish reading
474 // the logfile.
475 void set_exit_on_finish(bool exit_on_finish) {
476 exit_on_finish_ = exit_on_finish;
477 }
478
Austin Schuhe309d2a2019-11-29 13:25:21 -0800479 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800480 const Channel *RemapChannel(const EventLoop *event_loop,
481 const Channel *channel);
482
Austin Schuhe309d2a2019-11-29 13:25:21 -0800483 // Queues at least max_out_of_order_duration_ messages into channels_.
484 void QueueMessages();
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800485 // Handle constructing a configuration with all the additional remapped
486 // channels from calls to RemapLoggedChannel.
487 void MakeRemappedConfig();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800488
Austin Schuh2f8fd752020-09-01 22:38:28 -0700489 // Returns the number of nodes.
490 size_t nodes_count() const {
491 return !configuration::MultiNode(logged_configuration())
492 ? 1u
493 : logged_configuration()->nodes()->size();
494 }
495
Austin Schuh287d43d2020-12-04 20:19:33 -0800496 const std::vector<LogFile> log_files_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800497
Austin Schuh969cd602021-01-03 00:09:45 -0800498 // Class to manage sending RemoteMessages on the provided node after the
499 // correct delay.
500 class RemoteMessageSender{
501 public:
502 RemoteMessageSender(aos::Sender<message_bridge::RemoteMessage> sender,
503 EventLoop *event_loop);
504 RemoteMessageSender(RemoteMessageSender const &) = delete;
505 RemoteMessageSender &operator=(RemoteMessageSender const &) = delete;
506
507 // Sends the provided message. If monotonic_timestamp_time is min_time,
508 // send it immediately.
509 void Send(
510 FlatbufferDetachedBuffer<message_bridge::RemoteMessage> remote_message,
511 monotonic_clock::time_point monotonic_timestamp_time);
512
513 private:
514 // Handles actually sending the timestamp if we were delayed.
515 void SendTimestamp();
516 // Handles scheduling the timer to send at the correct time.
517 void ScheduleTimestamp();
518
519 EventLoop *event_loop_;
520 aos::Sender<message_bridge::RemoteMessage> sender_;
521 aos::TimerHandler *timer_;
522
523 // Time we are scheduled for, or min_time if we aren't scheduled.
524 monotonic_clock::time_point scheduled_time_ = monotonic_clock::min_time;
525
526 struct Timestamp {
527 Timestamp(FlatbufferDetachedBuffer<message_bridge::RemoteMessage>
528 new_remote_message,
529 monotonic_clock::time_point new_monotonic_timestamp_time)
530 : remote_message(std::move(new_remote_message)),
531 monotonic_timestamp_time(new_monotonic_timestamp_time) {}
532 FlatbufferDetachedBuffer<message_bridge::RemoteMessage> remote_message;
533 monotonic_clock::time_point monotonic_timestamp_time;
534 };
535
536 // List of messages to send. The timer works through them and then disables
537 // itself automatically.
538 std::deque<Timestamp> remote_timestamps_;
539 };
540
Austin Schuh6f3babe2020-01-26 20:34:50 -0800541 // State per node.
Austin Schuh858c9f32020-08-31 16:56:12 -0700542 class State {
543 public:
Austin Schuh287d43d2020-12-04 20:19:33 -0800544 State(std::unique_ptr<TimestampMapper> timestamp_mapper);
545
546 // Connects up the timestamp mappers.
547 void AddPeer(State *peer);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800548
Austin Schuhdda74ec2021-01-03 19:30:37 -0800549 // Returns the next sorted message with all the timestamps extracted and
550 // matched.
551 TimestampedMessage PopOldest();
Austin Schuh188eabe2020-12-29 23:41:13 -0800552
Austin Schuh858c9f32020-08-31 16:56:12 -0700553 // Returns the monotonic time of the oldest message.
554 monotonic_clock::time_point OldestMessageTime() const;
555
556 // Primes the queues inside State. Should be called before calling
557 // OldestMessageTime.
558 void SeedSortedMessages();
Austin Schuh8bd96322020-02-13 21:18:22 -0800559
Austin Schuh858c9f32020-08-31 16:56:12 -0700560 // Returns the starting time for this node.
561 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800562 return timestamp_mapper_ ? timestamp_mapper_->monotonic_start_time()
563 : monotonic_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700564 }
565 realtime_clock::time_point realtime_start_time() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800566 return timestamp_mapper_ ? timestamp_mapper_->realtime_start_time()
567 : realtime_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700568 }
569
570 // Sets the node event loop factory for replaying into a
571 // SimulatedEventLoopFactory. Returns the EventLoop to use.
572 EventLoop *SetNodeEventLoopFactory(
573 NodeEventLoopFactory *node_event_loop_factory);
574
575 // Sets and gets the event loop to use.
576 void set_event_loop(EventLoop *event_loop) { event_loop_ = event_loop; }
577 EventLoop *event_loop() { return event_loop_; }
578
Austin Schuh858c9f32020-08-31 16:56:12 -0700579 // Sets the current realtime offset from the monotonic clock for this node
580 // (if we are on a simulated event loop).
581 void SetRealtimeOffset(monotonic_clock::time_point monotonic_time,
582 realtime_clock::time_point realtime_time) {
583 if (node_event_loop_factory_ != nullptr) {
584 node_event_loop_factory_->SetRealtimeOffset(monotonic_time,
585 realtime_time);
586 }
587 }
588
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700589 // Returns the MessageHeader sender to log delivery timestamps to for the
590 // provided remote node.
Austin Schuh969cd602021-01-03 00:09:45 -0800591 RemoteMessageSender *RemoteTimestampSender(const Node *delivered_node);
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700592
Austin Schuh858c9f32020-08-31 16:56:12 -0700593 // Converts a timestamp from the monotonic clock on this node to the
594 // distributed clock.
595 distributed_clock::time_point ToDistributedClock(
596 monotonic_clock::time_point time) {
597 return node_event_loop_factory_->ToDistributedClock(time);
598 }
599
Austin Schuh858c9f32020-08-31 16:56:12 -0700600 // Returns the current time on the remote node which sends messages on
601 // channel_index.
602 monotonic_clock::time_point monotonic_remote_now(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700603 return channel_source_state_[channel_index]
604 ->node_event_loop_factory_->monotonic_now();
Austin Schuh858c9f32020-08-31 16:56:12 -0700605 }
606
Austin Schuh2f8fd752020-09-01 22:38:28 -0700607 distributed_clock::time_point RemoteToDistributedClock(
608 size_t channel_index, monotonic_clock::time_point time) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700609 return channel_source_state_[channel_index]
610 ->node_event_loop_factory_->ToDistributedClock(time);
Austin Schuh2f8fd752020-09-01 22:38:28 -0700611 }
612
613 const Node *remote_node(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700614 return channel_source_state_[channel_index]
615 ->node_event_loop_factory_->node();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700616 }
617
618 monotonic_clock::time_point monotonic_now() {
619 return node_event_loop_factory_->monotonic_now();
620 }
621
Austin Schuh858c9f32020-08-31 16:56:12 -0700622 // Sets the number of channels.
623 void SetChannelCount(size_t count);
624
625 // Sets the sender, filter, and target factory for a channel.
Austin Schuh969cd602021-01-03 00:09:45 -0800626 void SetChannel(size_t logged_channel_index, size_t factory_channel_index,
627 std::unique_ptr<RawSender> sender,
628 message_bridge::NoncausalOffsetEstimator *filter,
629 RemoteMessageSender *remote_timestamp_sender,
630 State *source_state);
Austin Schuh858c9f32020-08-31 16:56:12 -0700631
Austin Schuh858c9f32020-08-31 16:56:12 -0700632 // Unregisters everything so we can destory the event loop.
633 void Deregister();
634
635 // Sets the current TimerHandle for the replay callback.
636 void set_timer_handler(TimerHandler *timer_handler) {
637 timer_handler_ = timer_handler;
638 }
639
640 // Sets the next wakeup time on the replay callback.
641 void Setup(monotonic_clock::time_point next_time) {
642 timer_handler_->Setup(next_time);
643 }
644
645 // Sends a buffer on the provided channel index.
Austin Schuh287d43d2020-12-04 20:19:33 -0800646 bool Send(const TimestampedMessage &timestamped_message);
Austin Schuh858c9f32020-08-31 16:56:12 -0700647
648 // Returns a debug string for the channel merger.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700649 std::string DebugString() const {
650 std::stringstream messages;
651 size_t i = 0;
652 for (const auto &message : sorted_messages_) {
653 if (i < 7 || i + 7 > sorted_messages_.size()) {
654 messages << "sorted_messages[" << i
655 << "]: " << std::get<0>(message).monotonic_event_time << " "
656 << configuration::StrippedChannelToString(
657 event_loop_->configuration()->channels()->Get(
Austin Schuh287d43d2020-12-04 20:19:33 -0800658 std::get<0>(message).channel_index))
Austin Schuhdda74ec2021-01-03 19:30:37 -0800659 << (std::get<0>(message).data.span().size() == 0 ? " null"
660 : " data")
Austin Schuh2f8fd752020-09-01 22:38:28 -0700661 << "\n";
662 } else if (i == 7) {
663 messages << "...\n";
664 }
665 ++i;
666 }
Austin Schuh287d43d2020-12-04 20:19:33 -0800667 if (!timestamp_mapper_) {
668 return messages.str();
669 }
670 return messages.str() + timestamp_mapper_->DebugString();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700671 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700672
673 private:
674 // Log file.
Austin Schuh287d43d2020-12-04 20:19:33 -0800675 std::unique_ptr<TimestampMapper> timestamp_mapper_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700676
Austin Schuh287d43d2020-12-04 20:19:33 -0800677 std::deque<std::tuple<TimestampedMessage,
Austin Schuh2f8fd752020-09-01 22:38:28 -0700678 message_bridge::NoncausalOffsetEstimator *>>
Austin Schuh858c9f32020-08-31 16:56:12 -0700679 sorted_messages_;
680
681 // Senders.
682 std::vector<std::unique_ptr<RawSender>> channels_;
Austin Schuh969cd602021-01-03 00:09:45 -0800683 std::vector<RemoteMessageSender *> remote_timestamp_senders_;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700684 // The mapping from logged channel index to sent channel index. Needed for
685 // sending out MessageHeaders.
686 std::vector<int> factory_channel_index_;
687
Austin Schuh9942bae2021-01-07 22:06:44 -0800688 struct ContiguousSentTimestamp {
689 // Most timestamps make it through the network, so it saves a ton of
690 // memory and CPU to store the start and end, and search for valid ranges.
691 // For one of the logs I looked at, we had 2 ranges for 4 days.
692 //
693 // Save monotonic times as well to help if a queue index ever wraps. Odds
694 // are very low, but doesn't hurt.
695 //
696 // The starting time and matching queue index.
697 monotonic_clock::time_point starting_monotonic_event_time =
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700698 monotonic_clock::min_time;
Austin Schuh9942bae2021-01-07 22:06:44 -0800699 uint32_t starting_queue_index = 0xffffffff;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700700
Austin Schuh9942bae2021-01-07 22:06:44 -0800701 // Ending time and queue index.
702 monotonic_clock::time_point ending_monotonic_event_time =
703 monotonic_clock::max_time;
704 uint32_t ending_queue_index = 0xffffffff;
705
706 // The queue index that the first message was *actually* sent with. The
707 // queue indices are assumed to be contiguous through this range.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700708 uint32_t actual_queue_index = 0xffffffff;
709 };
710
711 // Stores all the timestamps that have been sent on this channel. This is
712 // only done for channels which are forwarded and on the node which
Austin Schuh9942bae2021-01-07 22:06:44 -0800713 // initially sends the message. Compress using ranges and offsets.
714 std::vector<std::unique_ptr<std::vector<ContiguousSentTimestamp>>>
715 queue_index_map_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700716
717 // Factory (if we are in sim) that this loop was created on.
718 NodeEventLoopFactory *node_event_loop_factory_ = nullptr;
719 std::unique_ptr<EventLoop> event_loop_unique_ptr_;
720 // Event loop.
721 EventLoop *event_loop_ = nullptr;
722 // And timer used to send messages.
723 TimerHandler *timer_handler_;
724
Austin Schuh8bd96322020-02-13 21:18:22 -0800725 // Filters (or nullptr if it isn't a forwarded channel) for each channel.
726 // This corresponds to the object which is shared among all the channels
727 // going between 2 nodes. The second element in the tuple indicates if this
728 // is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700729 std::vector<message_bridge::NoncausalOffsetEstimator *> filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800730
731 // List of NodeEventLoopFactorys (or nullptr if it isn't a forwarded
732 // channel) which correspond to the originating node.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700733 std::vector<State *> channel_source_state_;
734
Austin Schuh969cd602021-01-03 00:09:45 -0800735 std::map<const Node *, std::unique_ptr<RemoteMessageSender>>
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700736 remote_timestamp_senders_map_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800737 };
738
Austin Schuh8bd96322020-02-13 21:18:22 -0800739 // Node index -> State.
740 std::vector<std::unique_ptr<State>> states_;
741
742 // Creates the requested filter if it doesn't exist, regardless of whether
743 // these nodes can actually communicate directly. The second return value
744 // reports if this is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700745 message_bridge::NoncausalOffsetEstimator *GetFilter(const Node *node_a,
746 const Node *node_b);
Austin Schuh8bd96322020-02-13 21:18:22 -0800747
Austin Schuh8bd96322020-02-13 21:18:22 -0800748 // List of filters for a connection. The pointer to the first node will be
749 // less than the second node.
Austin Schuh0ca1fd32020-12-18 22:53:05 -0800750 std::unique_ptr<message_bridge::MultiNodeNoncausalOffsetEstimator> filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800751
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800752 std::unique_ptr<FlatbufferDetachedBuffer<Configuration>>
753 remapped_configuration_buffer_;
754
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800755 std::unique_ptr<SimulatedEventLoopFactory> event_loop_factory_unique_ptr_;
756 SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800757
758 // Map of channel indices to new name. The channel index will be an index into
759 // logged_configuration(), and the string key will be the name of the channel
760 // to send on instead of the logged channel name.
Austin Schuh0de30f32020-12-06 12:44:28 -0800761 struct RemappedChannel {
762 std::string remapped_name;
763 std::string new_type;
764 };
765 std::map<size_t, RemappedChannel> remapped_channels_;
Austin Schuh01b4c352020-09-21 23:09:39 -0700766 std::vector<MapT> maps_;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800767
Austin Schuh6f3babe2020-01-26 20:34:50 -0800768 // Number of nodes which still have data to send. This is used to figure out
769 // when to exit.
770 size_t live_nodes_ = 0;
771
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800772 const Configuration *remapped_configuration_ = nullptr;
773 const Configuration *replay_configuration_ = nullptr;
Austin Schuhcde938c2020-02-02 17:30:07 -0800774
775 // If true, the replay timer will ignore any missing data. This is used
776 // during startup when we are bootstrapping everything and trying to get to
777 // the start of all the log files.
778 bool ignore_missing_data_ = false;
James Kuszmaul71a81932020-12-15 21:08:01 -0800779
780 // Whether to exit the SimulatedEventLoop when we finish reading the logs.
781 bool exit_on_finish_ = true;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800782};
783
784} // namespace logger
785} // namespace aos
786
787#endif // AOS_EVENTS_LOGGER_H_