blob: d36db70d1ad5224cebb60b0496e623f43459412b [file] [log] [blame]
Austin Schuhb06f03b2021-02-17 22:00:37 -08001#ifndef AOS_EVENTS_LOGGING_LOG_READER_H_
2#define AOS_EVENTS_LOGGING_LOG_READER_H_
Austin Schuhe309d2a2019-11-29 13:25:21 -08003
Austin Schuh8bd96322020-02-13 21:18:22 -08004#include <chrono>
Austin Schuhe309d2a2019-11-29 13:25:21 -08005#include <deque>
James Kuszmaula16a7912022-06-17 10:58:12 -07006#include <queue>
James Kuszmaulc3f34d12022-08-15 15:57:55 -07007#include <string_view>
Austin Schuh2f8fd752020-09-01 22:38:28 -07008#include <tuple>
Austin Schuh6f3babe2020-01-26 20:34:50 -08009#include <vector>
Austin Schuhe309d2a2019-11-29 13:25:21 -080010
Philipp Schrader790cb542023-07-05 21:06:52 -070011#include "flatbuffers/flatbuffers.h"
Alexei Strots1f51ac72023-05-15 10:14:54 -070012#include "gflags/gflags.h"
13#include "glog/logging.h"
Philipp Schrader790cb542023-07-05 21:06:52 -070014
James Kuszmaulc3f34d12022-08-15 15:57:55 -070015#include "aos/condition.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080016#include "aos/events/event_loop.h"
Eric Schmiedebergae00e732023-04-12 15:53:17 -060017#include "aos/events/event_loop_tmpl.h"
Eric Schmiedeberge279b532023-04-19 16:36:02 -060018#include "aos/events/logging/config_remapper.h"
Austin Schuhf6f9bf32020-10-11 14:37:43 -070019#include "aos/events/logging/logfile_sorting.h"
Austin Schuha36c8902019-12-30 18:07:15 -080020#include "aos/events/logging/logfile_utils.h"
James Kuszmaul38735e82019-12-07 16:42:06 -080021#include "aos/events/logging/logger_generated.h"
Eric Schmiedeberge279b532023-04-19 16:36:02 -060022#include "aos/events/logging/replay_channels.h"
James Kuszmaula16a7912022-06-17 10:58:12 -070023#include "aos/events/logging/replay_timing_generated.h"
James Kuszmaul09632422022-05-25 15:56:19 -070024#include "aos/events/shm_event_loop.h"
Austin Schuh92547522019-12-28 14:33:43 -080025#include "aos/events/simulated_event_loop.h"
James Kuszmaulc3f34d12022-08-15 15:57:55 -070026#include "aos/mutex/mutex.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070027#include "aos/network/message_bridge_server_generated.h"
Austin Schuh0ca1fd32020-12-18 22:53:05 -080028#include "aos/network/multinode_timestamp_filter.h"
Austin Schuh0de30f32020-12-06 12:44:28 -080029#include "aos/network/remote_message_generated.h"
Austin Schuh8bd96322020-02-13 21:18:22 -080030#include "aos/network/timestamp_filter.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080031#include "aos/time/time.h"
James Kuszmaula16a7912022-06-17 10:58:12 -070032#include "aos/util/threaded_queue.h"
James Kuszmaulc3f34d12022-08-15 15:57:55 -070033#include "aos/uuid.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080034
35namespace aos {
36namespace logger {
37
Austin Schuhe33c08d2022-02-03 18:15:21 -080038class EventNotifier;
39
Austin Schuh6f3babe2020-01-26 20:34:50 -080040// We end up with one of the following 3 log file types.
41//
42// Single node logged as the source node.
43// -> Replayed just on the source node.
44//
45// Forwarding timestamps only logged from the perspective of the destination
46// node.
47// -> Matched with data on source node and logged.
48//
49// Forwarding timestamps with data logged as the destination node.
50// -> Replayed just as the destination
51// -> Replayed as the source (Much harder, ordering is not defined)
52//
53// Duplicate data logged. -> CHECK that it matches and explode otherwise.
54//
55// This can be boiled down to a set of constraints and tools.
56//
57// 1) Forwarding timestamps and data need to be logged separately.
58// 2) Any forwarded data logged on the destination node needs to be logged
59// separately such that it can be sorted.
60//
61// 1) Log reader needs to be able to sort a list of log files.
62// 2) Log reader needs to be able to merge sorted lists of log files.
63// 3) Log reader needs to be able to match timestamps with messages.
64//
65// We also need to be able to generate multiple views of a log file depending on
66// the target.
James Kuszmaul298b4a22023-06-28 20:01:03 -070067//
68// In general, we aim to guarantee that if you are using the LogReader
69// "normally" you should be able to observe all the messages that existed on the
70// live system between the start time and the end of the logfile, and that
71// CHECK-failures will be generated if the LogReader cannot satisfy that
72// guarantee. There are currently a few deliberate exceptions to this:
73// * Any channel marked NOT_LOGGED in the configuration is known not to
74// have been logged and thus will be silently absent in log replay.
75// * If an incomplete set of log files is provided to the reader (e.g.,
76// only logs logged on a single node on a multi-node system), then
77// any *individual* channel as observed on a given node will be
78// consistent, but similarly to a NOT_LOGGED channel, some data may
79// not be available.
80// * At the end of a log, data for some channels/nodes may end before
81// others; during this time period, you may observe silently dropped
82// messages. This will be most obvious on uncleanly terminated logs or
83// when merging logfiles across nodes (as the logs on different nodes
84// will not finish at identical times).
Austin Schuh6f3babe2020-01-26 20:34:50 -080085
Austin Schuhe309d2a2019-11-29 13:25:21 -080086// Replays all the channels in the logfile to the event loop.
87class LogReader {
88 public:
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -080089 // If you want to supply a new configuration that will be used for replay
90 // (e.g., to change message rates, or to populate an updated schema), then
91 // pass it in here. It must provide all the channels that the original logged
92 // config did.
Austin Schuh6f3babe2020-01-26 20:34:50 -080093 //
Eric Schmiedebergb38477e2022-12-02 16:08:04 -070094 // If certain messages should not be replayed, the replay_channels param can
95 // be used as an inclusive list of channels for messages to be replayed.
96 //
Austin Schuh287d43d2020-12-04 20:19:33 -080097 // The single file constructor calls SortParts internally.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -080098 LogReader(std::string_view filename,
Eric Schmiedebergb38477e2022-12-02 16:08:04 -070099 const Configuration *replay_configuration = nullptr,
100 const ReplayChannels *replay_channels = nullptr);
Austin Schuh287d43d2020-12-04 20:19:33 -0800101 LogReader(std::vector<LogFile> log_files,
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700102 const Configuration *replay_configuration = nullptr,
103 const ReplayChannels *replay_channels = nullptr);
Alexei Strots1f51ac72023-05-15 10:14:54 -0700104 LogReader(LogFilesContainer log_files,
105 const Configuration *replay_configuration = nullptr,
106 const ReplayChannels *replay_channels = nullptr);
James Kuszmaul7daef362019-12-31 18:28:17 -0800107 ~LogReader();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800108
Austin Schuh6331ef92020-01-07 18:28:09 -0800109 // Registers all the callbacks to send the log file data out on an event loop
110 // created in event_loop_factory. This also updates time to be at the start
111 // of the log file by running until the log file starts.
112 // Note: the configuration used in the factory should be configuration()
113 // below, but can be anything as long as the locations needed to send
114 // everything are available.
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800115 void Register(SimulatedEventLoopFactory *event_loop_factory);
Austin Schuhe33c08d2022-02-03 18:15:21 -0800116
Austin Schuh58646e22021-08-23 23:51:46 -0700117 // Registers all the callbacks to send the log file data out to an event loop
118 // factory. This does not start replaying or change the current distributed
119 // time of the factory. It does change the monotonic clocks to be right.
120 void RegisterWithoutStarting(SimulatedEventLoopFactory *event_loop_factory);
Austin Schuhe33c08d2022-02-03 18:15:21 -0800121 // Runs the log until the last start time. Register above is defined as:
122 // Register(...) {
123 // RegisterWithoutStarting
124 // StartAfterRegister
125 // }
126 // This should generally be considered as a stepping stone to convert from
127 // Register() to RegisterWithoutStarting() incrementally.
128 void StartAfterRegister(SimulatedEventLoopFactory *event_loop_factory);
129
Austin Schuh6331ef92020-01-07 18:28:09 -0800130 // Creates an SimulatedEventLoopFactory accessible via event_loop_factory(),
131 // and then calls Register.
132 void Register();
James Kuszmaul09632422022-05-25 15:56:19 -0700133
Austin Schuh6331ef92020-01-07 18:28:09 -0800134 // Registers callbacks for all the events after the log file starts. This is
135 // only useful when replaying live.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800136 void Register(EventLoop *event_loop);
Austin Schuh6331ef92020-01-07 18:28:09 -0800137
James Kuszmaula16a7912022-06-17 10:58:12 -0700138 // Sets a sender that should be used for tracking timing statistics. If not
139 // set, no statistics will be recorded.
140 void set_timing_accuracy_sender(
141 const Node *node, aos::Sender<timing::ReplayTiming> timing_sender) {
142 states_[configuration::GetNodeIndex(configuration(), node)]
143 ->set_timing_accuracy_sender(std::move(timing_sender));
144 }
145
Austin Schuh58646e22021-08-23 23:51:46 -0700146 // Called whenever a log file starts for a node.
James Kuszmaul82c3b512023-07-08 20:25:41 -0700147 // More precisely, this will be called on each boot at max of
148 // (realtime_start_time in the logfiles, SetStartTime()). If a given boot
149 // occurs entirely before the realtime_start_time, the OnStart handler will
150 // never get called for that boot.
151 //
152 // realtime_start_time is defined below, but/ essentially is the time at which
153 // message channels will start being internall consistent on a given node
154 // (i.e., when the logger started). Note: If you wish to see a watcher
155 // triggered for *every* message in a log, OnStart() will not be
156 // sufficient--messages (possibly multiple messages) may be present on
157 // channels prior to the start time. If attempting to do this, prefer to use
158 // NodeEventLoopFactory::OnStart.
Austin Schuh58646e22021-08-23 23:51:46 -0700159 void OnStart(std::function<void()> fn);
160 void OnStart(const Node *node, std::function<void()> fn);
James Kuszmaul82c3b512023-07-08 20:25:41 -0700161 // Called whenever a log file ends for a node on a given boot, or at the
162 // realtime_end_time specified by a flag or SetEndTime().
163 //
164 // A log file "ends" when there are no more messages to be replayed for that
165 // boot.
166 //
167 // If OnStart() is not called for a given boot, the OnEnd() handlers will not
168 // be called either. OnEnd() handlers will not be called if the logfile for a
169 // given boot has missing data that causes us to terminate replay early.
Austin Schuh58646e22021-08-23 23:51:46 -0700170 void OnEnd(std::function<void()> fn);
171 void OnEnd(const Node *node, std::function<void()> fn);
172
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800173 // Unregisters the senders. You only need to call this if you separately
174 // supplied an event loop or event loop factory and the lifetimes are such
175 // that they need to be explicitly destroyed before the LogReader destructor
176 // gets called.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800177 void Deregister();
178
Austin Schuh0c297012020-09-16 18:41:59 -0700179 // Returns the configuration being used for replay from the log file.
180 // Note that this may be different from the configuration actually used for
181 // handling events. You should generally only use this to create a
182 // SimulatedEventLoopFactory, and then get the configuration from there for
183 // everything else.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800184 const Configuration *logged_configuration() const;
Austin Schuh11d43732020-09-21 17:28:30 -0700185 // Returns the configuration being used for replay from the log file.
186 // Note that this may be different from the configuration actually used for
187 // handling events. You should generally only use this to create a
188 // SimulatedEventLoopFactory, and then get the configuration from there for
189 // everything else.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800190 // The pointer is invalidated whenever RemapLoggedChannel is called.
Austin Schuh15649d62019-12-28 16:36:38 -0800191 const Configuration *configuration() const;
192
Austin Schuh6f3babe2020-01-26 20:34:50 -0800193 // Returns the nodes that this log file was created on. This is a list of
Austin Schuh07676622021-01-21 18:59:17 -0800194 // pointers to a node in the nodes() list inside logged_configuration().
195 std::vector<const Node *> LoggedNodes() const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800196
197 // Returns the starting timestamp for the log file.
James Kuszmaul298b4a22023-06-28 20:01:03 -0700198 // All logged channels for the specified node should be entirely available
199 // after the specified time (i.e., any message that was available on the node
200 // in question after the monotonic start time but before the logs end and
201 // whose channel is present in any of the provided logs will either be
202 // available in the log or will result in an internal CHECK-failure of the
203 // LogReader if it would be skipped).
Austin Schuh11d43732020-09-21 17:28:30 -0700204 monotonic_clock::time_point monotonic_start_time(
205 const Node *node = nullptr) const;
206 realtime_clock::time_point realtime_start_time(
207 const Node *node = nullptr) const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800208
Austin Schuhe33c08d2022-02-03 18:15:21 -0800209 // Sets the start and end times to replay data until for all nodes. This
210 // overrides the --start_time and --end_time flags. The default is to replay
211 // all data.
212 void SetStartTime(std::string start_time);
213 void SetStartTime(realtime_clock::time_point start_time);
214 void SetEndTime(std::string end_time);
215 void SetEndTime(realtime_clock::time_point end_time);
216
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800217 // Causes the logger to publish the provided channel on a different name so
218 // that replayed applications can publish on the proper channel name without
219 // interference. This operates on raw channel names, without any node or
220 // application specific mappings.
Eric Schmiedeberge279b532023-04-19 16:36:02 -0600221 void RemapLoggedChannel(std::string_view name, std::string_view type,
222 std::string_view add_prefix = "/original",
223 std::string_view new_type = "",
224 ConfigRemapper::RemapConflict conflict_handling =
225 ConfigRemapper::RemapConflict::kCascade);
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800226 template <typename T>
Eric Schmiedeberge279b532023-04-19 16:36:02 -0600227 void RemapLoggedChannel(std::string_view name,
228 std::string_view add_prefix = "/original",
229 std::string_view new_type = "",
230 ConfigRemapper::RemapConflict conflict_handling =
231 ConfigRemapper::RemapConflict::kCascade) {
James Kuszmaul53da7f32022-09-11 11:11:55 -0700232 RemapLoggedChannel(name, T::GetFullyQualifiedName(), add_prefix, new_type,
233 conflict_handling);
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800234 }
Austin Schuh01b4c352020-09-21 23:09:39 -0700235 // Remaps the provided channel, though this respects node mappings, and
236 // preserves them too. This makes it so if /aos -> /pi1/aos on one node,
237 // /original/aos -> /original/pi1/aos on the same node after renaming, just
Austin Schuh0de30f32020-12-06 12:44:28 -0800238 // like you would hope. If new_type is not empty, the new channel will use
239 // the provided type instead. This allows for renaming messages.
Austin Schuh01b4c352020-09-21 23:09:39 -0700240 //
241 // TODO(austin): If you have 2 nodes remapping something to the same channel,
242 // this doesn't handle that. No use cases exist yet for that, so it isn't
243 // being done yet.
Eric Schmiedeberge279b532023-04-19 16:36:02 -0600244 void RemapLoggedChannel(std::string_view name, std::string_view type,
245 const Node *node,
246 std::string_view add_prefix = "/original",
247 std::string_view new_type = "",
248 ConfigRemapper::RemapConflict conflict_handling =
249 ConfigRemapper::RemapConflict::kCascade);
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700250 template <typename T>
Eric Schmiedeberge279b532023-04-19 16:36:02 -0600251 void RemapLoggedChannel(std::string_view name, const Node *node,
252 std::string_view add_prefix = "/original",
253 std::string_view new_type = "",
254 ConfigRemapper::RemapConflict conflict_handling =
255 ConfigRemapper::RemapConflict::kCascade) {
Austin Schuh0de30f32020-12-06 12:44:28 -0800256 RemapLoggedChannel(name, T::GetFullyQualifiedName(), node, add_prefix,
James Kuszmaul53da7f32022-09-11 11:11:55 -0700257 new_type, conflict_handling);
Austin Schuh01b4c352020-09-21 23:09:39 -0700258 }
259
Sanjay Narayanan5ec00232022-07-08 15:21:30 -0700260 // Similar to RemapLoggedChannel(), but lets you specify a name for the new
261 // channel without constraints. This is useful when an application has been
262 // updated to use new channels but you want to support replaying old logs. By
263 // default, this will not add any maps for the new channel. Use add_maps to
264 // specify any maps you'd like added.
265 void RenameLoggedChannel(std::string_view name, std::string_view type,
266 std::string_view new_name,
267 const std::vector<MapT> &add_maps = {});
268 template <typename T>
269 void RenameLoggedChannel(std::string_view name, std::string_view new_name,
270 const std::vector<MapT> &add_maps = {}) {
271 RenameLoggedChannel(name, T::GetFullyQualifiedName(), new_name, add_maps);
272 }
273 // The following overloads are more suitable for multi-node configurations,
274 // and let you rename a channel on a specific node.
275 void RenameLoggedChannel(std::string_view name, std::string_view type,
276 const Node *node, std::string_view new_name,
277 const std::vector<MapT> &add_maps = {});
278 template <typename T>
279 void RenameLoggedChannel(std::string_view name, const Node *node,
280 std::string_view new_name,
281 const std::vector<MapT> &add_maps = {}) {
282 RenameLoggedChannel(name, T::GetFullyQualifiedName(), node, new_name,
283 add_maps);
284 }
285
Austin Schuh01b4c352020-09-21 23:09:39 -0700286 template <typename T>
287 bool HasChannel(std::string_view name, const Node *node = nullptr) {
Sanjay Narayanan5ec00232022-07-08 15:21:30 -0700288 return HasChannel(name, T::GetFullyQualifiedName(), node);
289 }
290 bool HasChannel(std::string_view name, std::string_view type,
291 const Node *node) {
292 return configuration::GetChannel(logged_configuration(), name, type, "",
293 node, true) != nullptr;
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700294 }
295
Austin Schuh82529062021-12-08 12:09:52 -0800296 template <typename T>
297 void MaybeRemapLoggedChannel(std::string_view name,
298 const Node *node = nullptr) {
299 if (HasChannel<T>(name, node)) {
300 RemapLoggedChannel<T>(name, node);
301 }
302 }
Sanjay Narayanan5ec00232022-07-08 15:21:30 -0700303 template <typename T>
304 void MaybeRenameLoggedChannel(std::string_view name, const Node *node,
305 std::string_view new_name,
306 const std::vector<MapT> &add_maps = {}) {
307 if (HasChannel<T>(name, node)) {
308 RenameLoggedChannel<T>(name, node, new_name, add_maps);
309 }
310 }
Austin Schuh82529062021-12-08 12:09:52 -0800311
James Kuszmaul4f106fb2021-01-05 20:53:02 -0800312 // Returns true if the channel exists on the node and was logged.
313 template <typename T>
314 bool HasLoggedChannel(std::string_view name, const Node *node = nullptr) {
Eric Schmiedeberge279b532023-04-19 16:36:02 -0600315 return config_remapper_.HasOriginalChannel<T>(name, node);
James Kuszmaul4f106fb2021-01-05 20:53:02 -0800316 }
317
Austin Schuh1c227352021-09-17 12:53:54 -0700318 // Returns a list of all the original channels from remapping.
319 std::vector<const Channel *> RemappedChannels() const;
320
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800321 SimulatedEventLoopFactory *event_loop_factory() {
322 return event_loop_factory_;
323 }
324
Alexei Strots1f51ac72023-05-15 10:14:54 -0700325 std::string_view name() const { return log_files_.name(); }
Austin Schuh0c297012020-09-16 18:41:59 -0700326
James Kuszmaul71a81932020-12-15 21:08:01 -0800327 // Set whether to exit the SimulatedEventLoopFactory when we finish reading
328 // the logfile.
329 void set_exit_on_finish(bool exit_on_finish) {
330 exit_on_finish_ = exit_on_finish;
331 }
James Kuszmaulb11a1502022-07-01 16:02:25 -0700332 bool exit_on_finish() const { return exit_on_finish_; }
James Kuszmaul71a81932020-12-15 21:08:01 -0800333
James Kuszmaulb67409b2022-06-20 16:25:03 -0700334 // Sets the realtime replay rate. A value of 1.0 will cause the scheduler to
335 // try to play events in realtime. 0.5 will run at half speed. Use infinity
336 // (the default) to run as fast as possible. This can be changed during
337 // run-time.
338 // Only applies when running against a SimulatedEventLoopFactory.
339 void SetRealtimeReplayRate(double replay_rate);
340
Eric Schmiedebergae00e732023-04-12 15:53:17 -0600341 // Adds a callback for a channel to be called right before sending a message.
342 // This allows a user to mutate a message or do any processing when a specific
343 // type of message is sent on a channel. The name and type of the channel
344 // corresponds to the logged_configuration's name and type.
345 //
346 // Note, only one callback can be registered per channel in the current
347 // implementation. And, the callback is called only once one the Sender's Node
348 // if the channel is forwarded.
349 //
350 // See multinode_logger_test for examples of usage.
351 template <typename Callback>
352 void AddBeforeSendCallback(std::string_view channel_name,
353 Callback &&callback) {
354 CHECK(!AreStatesInitialized())
355 << ": Cannot add callbacks after calling Register";
356
357 using MessageType = typename std::remove_pointer<
358 typename event_loop_internal::watch_message_type_trait<
359 decltype(&Callback::operator())>::message_type>::type;
360
361 const Channel *channel = configuration::GetChannel(
362 logged_configuration(), channel_name,
363 MessageType::GetFullyQualifiedName(), "", nullptr);
364
365 CHECK(channel != nullptr)
366 << ": Channel { \"name\": \"" << channel_name << "\", \"type\": \""
367 << MessageType::GetFullyQualifiedName()
368 << "\" } not found in config for application.";
369 auto channel_index =
370 configuration::ChannelIndex(logged_configuration(), channel);
371
372 CHECK(!before_send_callbacks_[channel_index])
373 << ": Before Send Callback already registered for channel "
374 << ":{ \"name\": \"" << channel_name << "\", \"type\": \""
375 << MessageType::GetFullyQualifiedName() << "\" }";
376
377 before_send_callbacks_[channel_index] = [callback](void *message) {
378 callback(flatbuffers::GetMutableRoot<MessageType>(
379 reinterpret_cast<char *>(message)));
380 };
381 }
382
Austin Schuhe309d2a2019-11-29 13:25:21 -0800383 private:
Austin Schuh58646e22021-08-23 23:51:46 -0700384 void Register(EventLoop *event_loop, const Node *node);
385
386 void RegisterDuringStartup(EventLoop *event_loop, const Node *node);
387
388 const Channel *RemapChannel(const EventLoop *event_loop, const Node *node,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800389 const Channel *channel);
390
Austin Schuhe309d2a2019-11-29 13:25:21 -0800391 // Queues at least max_out_of_order_duration_ messages into channels_.
392 void QueueMessages();
Eric Schmiedeberge279b532023-04-19 16:36:02 -0600393
394 // Checks if any states have their event loops initialized which indicates
395 // events have been scheduled
396 void CheckEventsAreNotScheduled();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800397
Austin Schuh2f8fd752020-09-01 22:38:28 -0700398 // Returns the number of nodes.
399 size_t nodes_count() const {
400 return !configuration::MultiNode(logged_configuration())
401 ? 1u
402 : logged_configuration()->nodes()->size();
403 }
404
James Kuszmaulb11a1502022-07-01 16:02:25 -0700405 // Handles when an individual node hits the realtime end time, exitting the
406 // entire event loop once all nodes are stopped.
407 void NoticeRealtimeEnd();
408
Alexei Strots1f51ac72023-05-15 10:14:54 -0700409 const LogFilesContainer log_files_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800410
Austin Schuh969cd602021-01-03 00:09:45 -0800411 // Class to manage sending RemoteMessages on the provided node after the
412 // correct delay.
Austin Schuh5ee56872021-01-30 16:53:34 -0800413 class RemoteMessageSender {
Austin Schuh969cd602021-01-03 00:09:45 -0800414 public:
415 RemoteMessageSender(aos::Sender<message_bridge::RemoteMessage> sender,
416 EventLoop *event_loop);
417 RemoteMessageSender(RemoteMessageSender const &) = delete;
418 RemoteMessageSender &operator=(RemoteMessageSender const &) = delete;
419
420 // Sends the provided message. If monotonic_timestamp_time is min_time,
421 // send it immediately.
422 void Send(
423 FlatbufferDetachedBuffer<message_bridge::RemoteMessage> remote_message,
Austin Schuh58646e22021-08-23 23:51:46 -0700424 BootTimestamp monotonic_timestamp_time, size_t source_boot_count);
Austin Schuh969cd602021-01-03 00:09:45 -0800425
426 private:
427 // Handles actually sending the timestamp if we were delayed.
428 void SendTimestamp();
429 // Handles scheduling the timer to send at the correct time.
430 void ScheduleTimestamp();
431
432 EventLoop *event_loop_;
433 aos::Sender<message_bridge::RemoteMessage> sender_;
434 aos::TimerHandler *timer_;
435
436 // Time we are scheduled for, or min_time if we aren't scheduled.
437 monotonic_clock::time_point scheduled_time_ = monotonic_clock::min_time;
438
439 struct Timestamp {
440 Timestamp(FlatbufferDetachedBuffer<message_bridge::RemoteMessage>
441 new_remote_message,
442 monotonic_clock::time_point new_monotonic_timestamp_time)
443 : remote_message(std::move(new_remote_message)),
444 monotonic_timestamp_time(new_monotonic_timestamp_time) {}
445 FlatbufferDetachedBuffer<message_bridge::RemoteMessage> remote_message;
446 monotonic_clock::time_point monotonic_timestamp_time;
447 };
448
449 // List of messages to send. The timer works through them and then disables
450 // itself automatically.
451 std::deque<Timestamp> remote_timestamps_;
452 };
453
Austin Schuh6f3babe2020-01-26 20:34:50 -0800454 // State per node.
Austin Schuh858c9f32020-08-31 16:56:12 -0700455 class State {
456 public:
James Kuszmaula16a7912022-06-17 10:58:12 -0700457 // Whether we should spin up a separate thread for buffering up messages.
458 // Only allowed in realtime replay--see comments on threading_ member for
459 // details.
460 enum class ThreadedBuffering { kYes, kNo };
James Kuszmaul09632422022-05-25 15:56:19 -0700461 State(std::unique_ptr<TimestampMapper> timestamp_mapper,
Austin Schuh63097262023-08-16 17:04:29 -0700462 TimestampQueueStrategy timestamp_queue_strategy,
James Kuszmaul09632422022-05-25 15:56:19 -0700463 message_bridge::MultiNodeNoncausalOffsetEstimator *multinode_filters,
James Kuszmaulb11a1502022-07-01 16:02:25 -0700464 std::function<void()> notice_realtime_end, const Node *node,
465 ThreadedBuffering threading,
Eric Schmiedebergae00e732023-04-12 15:53:17 -0600466 std::unique_ptr<const ReplayChannelIndices> replay_channel_indices,
467 const std::vector<std::function<void(void *message)>>
468 &before_send_callbacks);
Austin Schuh287d43d2020-12-04 20:19:33 -0800469
470 // Connects up the timestamp mappers.
471 void AddPeer(State *peer);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800472
Austin Schuhe639ea12021-01-25 13:00:22 -0800473 TimestampMapper *timestamp_mapper() { return timestamp_mapper_.get(); }
474
Austin Schuhdda74ec2021-01-03 19:30:37 -0800475 // Returns the next sorted message with all the timestamps extracted and
476 // matched.
477 TimestampedMessage PopOldest();
Austin Schuh188eabe2020-12-29 23:41:13 -0800478
Austin Schuh858c9f32020-08-31 16:56:12 -0700479 // Returns the monotonic time of the oldest message.
James Kuszmaula16a7912022-06-17 10:58:12 -0700480 BootTimestamp SingleThreadedOldestMessageTime();
481 // Returns the monotonic time of the oldest message, handling querying the
482 // separate thread of ThreadedBuffering was set.
483 BootTimestamp MultiThreadedOldestMessageTime();
Austin Schuh58646e22021-08-23 23:51:46 -0700484
485 size_t boot_count() const {
486 // If we are replaying directly into an event loop, we can't reboot. So
487 // we will stay stuck on the 0th boot.
James Kuszmaul09632422022-05-25 15:56:19 -0700488 if (!node_event_loop_factory_) {
489 if (event_loop_ == nullptr) {
490 // If boot_count is being checked after startup for any of the
491 // non-primary nodes, then returning 0 may not be accurate (since
492 // remote nodes *can* reboot even if the EventLoop being played to
493 // can't).
494 CHECK(!started_);
495 CHECK(!stopped_);
496 }
497 return 0u;
498 }
Austin Schuh58646e22021-08-23 23:51:46 -0700499 return node_event_loop_factory_->boot_count();
500 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700501
Austin Schuh63097262023-08-16 17:04:29 -0700502 // Reads all the timestamps into RAM so we don't need to manage buffering
503 // them. For logs where the timestamps are in separate files, this
504 // minimizes RAM usage in the cases where the log reader decides to buffer
505 // to the end of the file, or where the time estimation buffer needs to be
506 // set high to sort. This means we devote our RAM to holding lots of
507 // timestamps instead of timestamps and much larger data for a shorter
508 // period. For logs where timestamps are stored with the data, this
509 // triggers those files to be read twice.
510 void ReadTimestamps();
511
Austin Schuh858c9f32020-08-31 16:56:12 -0700512 // Primes the queues inside State. Should be called before calling
513 // OldestMessageTime.
Austin Schuh63097262023-08-16 17:04:29 -0700514 void MaybeSeedSortedMessages();
Austin Schuh8bd96322020-02-13 21:18:22 -0800515
Philipp Schradera6712522023-07-05 20:25:11 -0700516 void SetUpStartupTimer() {
Austin Schuh58646e22021-08-23 23:51:46 -0700517 const monotonic_clock::time_point start_time =
518 monotonic_start_time(boot_count());
519 if (start_time == monotonic_clock::min_time) {
Austin Schuh3e31f912023-08-21 21:29:10 -0700520 if (event_loop_->node()) {
521 LOG(ERROR) << "No start time for "
522 << event_loop_->node()->name()->string_view()
523 << ", skipping.";
524 } else {
525 LOG(ERROR) << "No start time, skipping.";
526 }
527
528 // This is called from OnRun. There is too much complexity in supporting
529 // OnStartup callbacks from inside OnRun. Instead, schedule a timer for
530 // "now", and have that do what we need.
531 startup_timer_->Schedule(event_loop_->monotonic_now());
Austin Schuh58646e22021-08-23 23:51:46 -0700532 return;
533 }
James Kuszmaul09632422022-05-25 15:56:19 -0700534 if (node_event_loop_factory_) {
535 CHECK_GE(start_time + clock_offset(), event_loop_->monotonic_now());
536 }
Philipp Schradera6712522023-07-05 20:25:11 -0700537 startup_timer_->Schedule(start_time + clock_offset());
Austin Schuh58646e22021-08-23 23:51:46 -0700538 }
539
540 void set_startup_timer(TimerHandler *timer_handler) {
541 startup_timer_ = timer_handler;
542 if (startup_timer_) {
543 if (event_loop_->node() != nullptr) {
544 startup_timer_->set_name(absl::StrCat(
545 event_loop_->node()->name()->string_view(), "_startup"));
546 } else {
547 startup_timer_->set_name("startup");
548 }
549 }
550 }
551
Austin Schuh858c9f32020-08-31 16:56:12 -0700552 // Returns the starting time for this node.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700553 monotonic_clock::time_point monotonic_start_time(size_t boot_count) const {
554 return timestamp_mapper_
555 ? timestamp_mapper_->monotonic_start_time(boot_count)
556 : monotonic_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700557 }
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700558 realtime_clock::time_point realtime_start_time(size_t boot_count) const {
559 return timestamp_mapper_
560 ? timestamp_mapper_->realtime_start_time(boot_count)
561 : realtime_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700562 }
563
564 // Sets the node event loop factory for replaying into a
565 // SimulatedEventLoopFactory. Returns the EventLoop to use.
Austin Schuh60e77942022-05-16 17:48:24 -0700566 void SetNodeEventLoopFactory(NodeEventLoopFactory *node_event_loop_factory,
567 SimulatedEventLoopFactory *event_loop_factory);
Austin Schuh858c9f32020-08-31 16:56:12 -0700568
569 // Sets and gets the event loop to use.
570 void set_event_loop(EventLoop *event_loop) { event_loop_ = event_loop; }
571 EventLoop *event_loop() { return event_loop_; }
572
Austin Schuh58646e22021-08-23 23:51:46 -0700573 const Node *node() const { return node_; }
574
575 void Register(EventLoop *event_loop);
576
577 void OnStart(std::function<void()> fn);
578 void OnEnd(std::function<void()> fn);
579
Austin Schuh858c9f32020-08-31 16:56:12 -0700580 // Sets the current realtime offset from the monotonic clock for this node
581 // (if we are on a simulated event loop).
582 void SetRealtimeOffset(monotonic_clock::time_point monotonic_time,
583 realtime_clock::time_point realtime_time) {
584 if (node_event_loop_factory_ != nullptr) {
585 node_event_loop_factory_->SetRealtimeOffset(monotonic_time,
586 realtime_time);
587 }
588 }
589
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700590 // Returns the MessageHeader sender to log delivery timestamps to for the
591 // provided remote node.
Austin Schuh61e973f2021-02-21 21:43:56 -0800592 RemoteMessageSender *RemoteTimestampSender(const Channel *channel,
593 const Connection *connection);
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700594
Austin Schuh858c9f32020-08-31 16:56:12 -0700595 // Converts a timestamp from the monotonic clock on this node to the
596 // distributed clock.
597 distributed_clock::time_point ToDistributedClock(
598 monotonic_clock::time_point time) {
James Kuszmaul09632422022-05-25 15:56:19 -0700599 CHECK(node_event_loop_factory_);
Austin Schuh858c9f32020-08-31 16:56:12 -0700600 return node_event_loop_factory_->ToDistributedClock(time);
601 }
602
Austin Schuh858c9f32020-08-31 16:56:12 -0700603 // Returns the current time on the remote node which sends messages on
604 // channel_index.
Austin Schuh58646e22021-08-23 23:51:46 -0700605 BootTimestamp monotonic_remote_now(size_t channel_index) {
606 State *s = channel_source_state_[channel_index];
607 return BootTimestamp{
608 .boot = s->boot_count(),
609 .time = s->node_event_loop_factory_->monotonic_now()};
Austin Schuh858c9f32020-08-31 16:56:12 -0700610 }
611
Austin Schuh5ee56872021-01-30 16:53:34 -0800612 // Returns the start time of the remote for the provided channel.
613 monotonic_clock::time_point monotonic_remote_start_time(
Austin Schuh58646e22021-08-23 23:51:46 -0700614 size_t boot_count, size_t channel_index) {
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700615 return channel_source_state_[channel_index]->monotonic_start_time(
616 boot_count);
Austin Schuh5ee56872021-01-30 16:53:34 -0800617 }
618
Austin Schuh58646e22021-08-23 23:51:46 -0700619 void DestroyEventLoop() { event_loop_unique_ptr_.reset(); }
620
621 EventLoop *MakeEventLoop() {
622 CHECK(!event_loop_unique_ptr_);
James Kuszmaul890c2492022-04-06 14:59:31 -0700623 // TODO(james): Enable exclusive senders on LogReader to allow us to
624 // ensure we are remapping channels correctly.
625 event_loop_unique_ptr_ = node_event_loop_factory_->MakeEventLoop(
626 "log_reader", {NodeEventLoopFactory::CheckSentTooFast::kNo,
James Kuszmaul94ca5132022-07-19 09:11:08 -0700627 NodeEventLoopFactory::ExclusiveSenders::kYes,
628 NonExclusiveChannels()});
Austin Schuh58646e22021-08-23 23:51:46 -0700629 return event_loop_unique_ptr_.get();
630 }
631
Austin Schuh2f8fd752020-09-01 22:38:28 -0700632 distributed_clock::time_point RemoteToDistributedClock(
633 size_t channel_index, monotonic_clock::time_point time) {
James Kuszmaul09632422022-05-25 15:56:19 -0700634 CHECK(node_event_loop_factory_);
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700635 return channel_source_state_[channel_index]
636 ->node_event_loop_factory_->ToDistributedClock(time);
Austin Schuh2f8fd752020-09-01 22:38:28 -0700637 }
638
639 const Node *remote_node(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700640 return channel_source_state_[channel_index]
641 ->node_event_loop_factory_->node();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700642 }
643
Stephan Pleines559fa6c2022-01-06 17:23:51 -0800644 monotonic_clock::time_point monotonic_now() const {
Alexei Strotsb8c3a702023-04-19 21:38:25 -0700645 CHECK_NOTNULL(event_loop_);
James Kuszmaul09632422022-05-25 15:56:19 -0700646 return event_loop_->monotonic_now();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700647 }
648
Austin Schuh858c9f32020-08-31 16:56:12 -0700649 // Sets the number of channels.
650 void SetChannelCount(size_t count);
651
652 // Sets the sender, filter, and target factory for a channel.
Austin Schuh969cd602021-01-03 00:09:45 -0800653 void SetChannel(size_t logged_channel_index, size_t factory_channel_index,
654 std::unique_ptr<RawSender> sender,
655 message_bridge::NoncausalOffsetEstimator *filter,
Austin Schuh58646e22021-08-23 23:51:46 -0700656 bool is_forwarded, State *source_state);
657
658 void SetRemoteTimestampSender(size_t logged_channel_index,
659 RemoteMessageSender *remote_timestamp_sender);
660
661 void RunOnStart();
662 void RunOnEnd();
Austin Schuh858c9f32020-08-31 16:56:12 -0700663
Austin Schuhe33c08d2022-02-03 18:15:21 -0800664 // Handles a logfile start event to potentially call the OnStart callbacks.
665 void NotifyLogfileStart();
666 // Handles a start time flag start event to potentially call the OnStart
667 // callbacks.
668 void NotifyFlagStart();
669
670 // Handles a logfile end event to potentially call the OnEnd callbacks.
671 void NotifyLogfileEnd();
672 // Handles a end time flag start event to potentially call the OnEnd
673 // callbacks.
674 void NotifyFlagEnd();
675
Austin Schuh858c9f32020-08-31 16:56:12 -0700676 // Unregisters everything so we can destory the event loop.
Austin Schuh58646e22021-08-23 23:51:46 -0700677 // TODO(austin): Is this needed? OnShutdown should be able to serve this
678 // need.
Austin Schuh858c9f32020-08-31 16:56:12 -0700679 void Deregister();
680
681 // Sets the current TimerHandle for the replay callback.
682 void set_timer_handler(TimerHandler *timer_handler) {
683 timer_handler_ = timer_handler;
Austin Schuh58646e22021-08-23 23:51:46 -0700684 if (timer_handler_) {
685 if (event_loop_->node() != nullptr) {
686 timer_handler_->set_name(absl::StrCat(
687 event_loop_->node()->name()->string_view(), "_main"));
688 } else {
689 timer_handler_->set_name("main");
690 }
691 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700692 }
693
Austin Schuhe33c08d2022-02-03 18:15:21 -0800694 // Creates and registers the --start_time and --end_time event callbacks.
695 void SetStartTimeFlag(realtime_clock::time_point start_time);
696 void SetEndTimeFlag(realtime_clock::time_point end_time);
697
698 // Notices the next message to update the start/end time callbacks.
699 void ObserveNextMessage(monotonic_clock::time_point monotonic_event,
700 realtime_clock::time_point realtime_event);
701
702 // Clears the start and end time flag handlers so we can delete the event
703 // loop.
704 void ClearTimeFlags();
705
Austin Schuh858c9f32020-08-31 16:56:12 -0700706 // Sets the next wakeup time on the replay callback.
Philipp Schradera6712522023-07-05 20:25:11 -0700707 void Schedule(monotonic_clock::time_point next_time) {
708 timer_handler_->Schedule(
James Kuszmaul8866e642022-06-10 16:00:36 -0700709 std::max(monotonic_now(), next_time + clock_offset()));
Austin Schuh858c9f32020-08-31 16:56:12 -0700710 }
711
712 // Sends a buffer on the provided channel index.
Eric Schmiedebergae00e732023-04-12 15:53:17 -0600713 bool Send(const TimestampedMessage &&timestamped_message);
Austin Schuh858c9f32020-08-31 16:56:12 -0700714
James Kuszmaulc3f34d12022-08-15 15:57:55 -0700715 void MaybeSetClockOffset();
James Kuszmaul09632422022-05-25 15:56:19 -0700716 std::chrono::nanoseconds clock_offset() const { return clock_offset_; }
717
Austin Schuh858c9f32020-08-31 16:56:12 -0700718 // Returns a debug string for the channel merger.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700719 std::string DebugString() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800720 if (!timestamp_mapper_) {
Austin Schuhe639ea12021-01-25 13:00:22 -0800721 return "";
Austin Schuh287d43d2020-12-04 20:19:33 -0800722 }
Austin Schuhe639ea12021-01-25 13:00:22 -0800723 return timestamp_mapper_->DebugString();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700724 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700725
Austin Schuh58646e22021-08-23 23:51:46 -0700726 void ClearRemoteTimestampSenders() {
727 channel_timestamp_loggers_.clear();
728 timestamp_loggers_.clear();
729 }
730
Austin Schuhbd5f74a2021-11-11 20:55:38 -0800731 void SetFoundLastMessage(bool val) {
732 found_last_message_ = val;
733 last_message_.resize(factory_channel_index_.size(), false);
734 }
735 bool found_last_message() const { return found_last_message_; }
736
737 void set_last_message(size_t channel_index) {
738 CHECK_LT(channel_index, last_message_.size());
739 last_message_[channel_index] = true;
740 }
741
742 bool last_message(size_t channel_index) {
743 CHECK_LT(channel_index, last_message_.size());
744 return last_message_[channel_index];
745 }
746
James Kuszmaula16a7912022-06-17 10:58:12 -0700747 void set_timing_accuracy_sender(
748 aos::Sender<timing::ReplayTiming> timing_sender) {
749 timing_statistics_sender_ = std::move(timing_sender);
750 OnEnd([this]() { SendMessageTimings(); });
751 }
752
753 // If running with ThreadedBuffering::kYes, will start the processing thread
754 // and queue up messages until the specified time. No-op of
755 // ThreadedBuffering::kNo is set. Should only be called once.
756 void QueueThreadUntil(BootTimestamp time);
757
Austin Schuh858c9f32020-08-31 16:56:12 -0700758 private:
James Kuszmaulc3f34d12022-08-15 15:57:55 -0700759 void TrackMessageSendTiming(const RawSender &sender,
760 monotonic_clock::time_point expected_send_time);
James Kuszmaula16a7912022-06-17 10:58:12 -0700761 void SendMessageTimings();
Austin Schuh858c9f32020-08-31 16:56:12 -0700762 // Log file.
Austin Schuh287d43d2020-12-04 20:19:33 -0800763 std::unique_ptr<TimestampMapper> timestamp_mapper_;
Austin Schuh63097262023-08-16 17:04:29 -0700764 const TimestampQueueStrategy timestamp_queue_strategy_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700765
Austin Schuh858c9f32020-08-31 16:56:12 -0700766 // Senders.
767 std::vector<std::unique_ptr<RawSender>> channels_;
Austin Schuh969cd602021-01-03 00:09:45 -0800768 std::vector<RemoteMessageSender *> remote_timestamp_senders_;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700769 // The mapping from logged channel index to sent channel index. Needed for
770 // sending out MessageHeaders.
771 std::vector<int> factory_channel_index_;
772
Austin Schuh9942bae2021-01-07 22:06:44 -0800773 struct ContiguousSentTimestamp {
774 // Most timestamps make it through the network, so it saves a ton of
775 // memory and CPU to store the start and end, and search for valid ranges.
776 // For one of the logs I looked at, we had 2 ranges for 4 days.
777 //
778 // Save monotonic times as well to help if a queue index ever wraps. Odds
779 // are very low, but doesn't hurt.
780 //
781 // The starting time and matching queue index.
782 monotonic_clock::time_point starting_monotonic_event_time =
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700783 monotonic_clock::min_time;
Austin Schuh9942bae2021-01-07 22:06:44 -0800784 uint32_t starting_queue_index = 0xffffffff;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700785
Austin Schuh9942bae2021-01-07 22:06:44 -0800786 // Ending time and queue index.
787 monotonic_clock::time_point ending_monotonic_event_time =
788 monotonic_clock::max_time;
789 uint32_t ending_queue_index = 0xffffffff;
790
791 // The queue index that the first message was *actually* sent with. The
792 // queue indices are assumed to be contiguous through this range.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700793 uint32_t actual_queue_index = 0xffffffff;
794 };
795
James Kuszmaul94ca5132022-07-19 09:11:08 -0700796 // Returns a list of channels which LogReader will send on but which may
797 // *also* get sent on by other applications in replay.
798 std::vector<
799 std::pair<const aos::Channel *, NodeEventLoopFactory::ExclusiveSenders>>
800 NonExclusiveChannels();
801
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700802 // Stores all the timestamps that have been sent on this channel. This is
803 // only done for channels which are forwarded and on the node which
Austin Schuh9942bae2021-01-07 22:06:44 -0800804 // initially sends the message. Compress using ranges and offsets.
805 std::vector<std::unique_ptr<std::vector<ContiguousSentTimestamp>>>
806 queue_index_map_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700807
808 // Factory (if we are in sim) that this loop was created on.
809 NodeEventLoopFactory *node_event_loop_factory_ = nullptr;
Austin Schuhe33c08d2022-02-03 18:15:21 -0800810 SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
811
James Kuszmaulb11a1502022-07-01 16:02:25 -0700812 // Callback for when this node hits its realtime end time.
813 std::function<void()> notice_realtime_end_;
814
Austin Schuh858c9f32020-08-31 16:56:12 -0700815 std::unique_ptr<EventLoop> event_loop_unique_ptr_;
816 // Event loop.
Austin Schuh58646e22021-08-23 23:51:46 -0700817 const Node *node_ = nullptr;
Austin Schuh858c9f32020-08-31 16:56:12 -0700818 EventLoop *event_loop_ = nullptr;
819 // And timer used to send messages.
Austin Schuh58646e22021-08-23 23:51:46 -0700820 TimerHandler *timer_handler_ = nullptr;
821 TimerHandler *startup_timer_ = nullptr;
Austin Schuh858c9f32020-08-31 16:56:12 -0700822
Austin Schuhe33c08d2022-02-03 18:15:21 -0800823 std::unique_ptr<EventNotifier> start_event_notifier_;
824 std::unique_ptr<EventNotifier> end_event_notifier_;
825
Austin Schuh8bd96322020-02-13 21:18:22 -0800826 // Filters (or nullptr if it isn't a forwarded channel) for each channel.
827 // This corresponds to the object which is shared among all the channels
828 // going between 2 nodes. The second element in the tuple indicates if this
829 // is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700830 std::vector<message_bridge::NoncausalOffsetEstimator *> filters_;
James Kuszmaul09632422022-05-25 15:56:19 -0700831 message_bridge::MultiNodeNoncausalOffsetEstimator *multinode_filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800832
Austin Schuh84dd1332023-05-03 13:09:47 -0700833 // List of States (or nullptr if it isn't a forwarded channel) which
834 // correspond to the originating node.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700835 std::vector<State *> channel_source_state_;
836
Austin Schuh61e973f2021-02-21 21:43:56 -0800837 // This is a cache for channel, connection mapping to the corresponding
838 // sender.
839 absl::btree_map<std::pair<const Channel *, const Connection *>,
840 std::shared_ptr<RemoteMessageSender>>
841 channel_timestamp_loggers_;
842
843 // Mapping from resolved RemoteMessage channel to RemoteMessage sender. This
844 // is the channel that timestamps are published to.
845 absl::btree_map<const Channel *, std::shared_ptr<RemoteMessageSender>>
846 timestamp_loggers_;
Austin Schuh58646e22021-08-23 23:51:46 -0700847
James Kuszmaul09632422022-05-25 15:56:19 -0700848 // Time offset between the log's monotonic clock and the current event
849 // loop's monotonic clock. Useful when replaying logs with non-simulated
850 // event loops.
851 std::chrono::nanoseconds clock_offset_{0};
852
Austin Schuh58646e22021-08-23 23:51:46 -0700853 std::vector<std::function<void()>> on_starts_;
854 std::vector<std::function<void()>> on_ends_;
855
James Kuszmaula16a7912022-06-17 10:58:12 -0700856 std::atomic<bool> stopped_ = false;
857 std::atomic<bool> started_ = false;
Austin Schuhbd5f74a2021-11-11 20:55:38 -0800858
859 bool found_last_message_ = false;
860 std::vector<bool> last_message_;
James Kuszmaula16a7912022-06-17 10:58:12 -0700861
862 std::vector<timing::MessageTimingT> send_timings_;
863 aos::Sender<timing::ReplayTiming> timing_statistics_sender_;
864
865 // Protects access to any internal state after Run() is called. Designed
866 // assuming that only one node is actually executing in replay.
867 // Threading design:
868 // * The worker passed to message_queuer_ has full ownership over all
869 // the log-reading code, timestamp filters, last_queued_message_, etc.
870 // * The main thread should only have exclusive access to the replay
871 // event loop and associated features (mainly senders).
872 // It will pop an item out of the queue (which does maintain a shared_ptr
873 // reference which may also be being used by the message_queuer_ thread,
874 // but having shared_ptr's accessing the same memory from
875 // separate threads is permissible).
876 // Enabling this in simulation is currently infeasible due to a lack of
877 // synchronization in the MultiNodeNoncausalOffsetEstimator. Essentially,
878 // when the message_queuer_ thread attempts to read/pop messages from the
879 // timestamp_mapper_, it will end up calling callbacks that update the
880 // internal state of the MultiNodeNoncausalOffsetEstimator. Simultaneously,
881 // the event scheduler that is running in the main thread to orchestrate the
882 // simulation will be querying the estimator to know what the clocks on the
883 // various nodes are at, leading to potential issues.
884 ThreadedBuffering threading_;
885 std::optional<BootTimestamp> last_queued_message_;
886 std::optional<util::ThreadedQueue<TimestampedMessage, BootTimestamp>>
887 message_queuer_;
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700888
889 // If a ReplayChannels was passed to LogReader, this will hold the
890 // indices of the channels to replay for the Node represented by
891 // the instance of LogReader::State.
Naman Guptacf6d4422023-03-01 11:41:00 -0800892 std::unique_ptr<const ReplayChannelIndices> replay_channel_indices_;
Eric Schmiedebergae00e732023-04-12 15:53:17 -0600893 const std::vector<std::function<void(void *message)>>
894 before_send_callbacks_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800895 };
896
Eric Schmiedebergae00e732023-04-12 15:53:17 -0600897 // Checks if any of the States have been constructed yet.
898 // This happens during Register
899 bool AreStatesInitialized() const;
900
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700901 // If a ReplayChannels was passed to LogReader then creates a
Naman Guptacf6d4422023-03-01 11:41:00 -0800902 // ReplayChannelIndices for the given node. Otherwise, returns a nullptr.
903 std::unique_ptr<const ReplayChannelIndices> MaybeMakeReplayChannelIndices(
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700904 const Node *node);
905
Austin Schuh8bd96322020-02-13 21:18:22 -0800906 // Node index -> State.
907 std::vector<std::unique_ptr<State>> states_;
908
909 // Creates the requested filter if it doesn't exist, regardless of whether
910 // these nodes can actually communicate directly. The second return value
911 // reports if this is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700912 message_bridge::NoncausalOffsetEstimator *GetFilter(const Node *node_a,
913 const Node *node_b);
Austin Schuh8bd96322020-02-13 21:18:22 -0800914
Austin Schuh63097262023-08-16 17:04:29 -0700915 // Returns the timestamp queueing strategy to use.
916 TimestampQueueStrategy ComputeTimestampQueueStrategy() const;
917
Austin Schuh8bd96322020-02-13 21:18:22 -0800918 // List of filters for a connection. The pointer to the first node will be
919 // less than the second node.
Austin Schuh0ca1fd32020-12-18 22:53:05 -0800920 std::unique_ptr<message_bridge::MultiNodeNoncausalOffsetEstimator> filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800921
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800922 std::unique_ptr<SimulatedEventLoopFactory> event_loop_factory_unique_ptr_;
923 SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800924
Austin Schuh6f3babe2020-01-26 20:34:50 -0800925 // Number of nodes which still have data to send. This is used to figure out
926 // when to exit.
927 size_t live_nodes_ = 0;
928
James Kuszmaulb11a1502022-07-01 16:02:25 -0700929 // Similar counter to live_nodes_, but for tracking which individual nodes are
930 // running and have yet to hit the realtime end time, if any.
931 size_t live_nodes_with_realtime_time_end_ = 0;
932
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800933 const Configuration *replay_configuration_ = nullptr;
Austin Schuhcde938c2020-02-02 17:30:07 -0800934
Eric Schmiedebergb38477e2022-12-02 16:08:04 -0700935 // If a ReplayChannels was passed to LogReader, this will hold the
936 // name and type of channels to replay which is used when creating States.
937 const ReplayChannels *replay_channels_ = nullptr;
938
Eric Schmiedebergae00e732023-04-12 15:53:17 -0600939 // The callbacks that will be called before sending a message indexed by the
940 // channel index from the logged_configuration
941 std::vector<std::function<void(void *message)>> before_send_callbacks_;
942
Austin Schuhcde938c2020-02-02 17:30:07 -0800943 // If true, the replay timer will ignore any missing data. This is used
944 // during startup when we are bootstrapping everything and trying to get to
945 // the start of all the log files.
946 bool ignore_missing_data_ = false;
James Kuszmaul71a81932020-12-15 21:08:01 -0800947
948 // Whether to exit the SimulatedEventLoop when we finish reading the logs.
949 bool exit_on_finish_ = true;
Austin Schuhe33c08d2022-02-03 18:15:21 -0800950
951 realtime_clock::time_point start_time_ = realtime_clock::min_time;
952 realtime_clock::time_point end_time_ = realtime_clock::max_time;
Eric Schmiedeberge279b532023-04-19 16:36:02 -0600953 ConfigRemapper config_remapper_;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800954};
955
956} // namespace logger
957} // namespace aos
958
Austin Schuhb06f03b2021-02-17 22:00:37 -0800959#endif // AOS_EVENTS_LOGGING_LOG_READER_H_