blob: 9e44996e4a50b9299de56a01ffce19509cc37022 [file] [log] [blame]
Austin Schuhb06f03b2021-02-17 22:00:37 -08001#ifndef AOS_EVENTS_LOGGING_LOG_READER_H_
2#define AOS_EVENTS_LOGGING_LOG_READER_H_
Austin Schuhe309d2a2019-11-29 13:25:21 -08003
Austin Schuh8bd96322020-02-13 21:18:22 -08004#include <chrono>
Austin Schuhe309d2a2019-11-29 13:25:21 -08005#include <deque>
James Kuszmaula16a7912022-06-17 10:58:12 -07006#include <queue>
James Kuszmaulc3f34d12022-08-15 15:57:55 -07007#include <string_view>
Austin Schuh2f8fd752020-09-01 22:38:28 -07008#include <tuple>
Austin Schuh6f3babe2020-01-26 20:34:50 -08009#include <vector>
Austin Schuhe309d2a2019-11-29 13:25:21 -080010
James Kuszmaulc3f34d12022-08-15 15:57:55 -070011#include "aos/condition.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080012#include "aos/events/event_loop.h"
Austin Schuhf6f9bf32020-10-11 14:37:43 -070013#include "aos/events/logging/logfile_sorting.h"
Austin Schuha36c8902019-12-30 18:07:15 -080014#include "aos/events/logging/logfile_utils.h"
James Kuszmaul38735e82019-12-07 16:42:06 -080015#include "aos/events/logging/logger_generated.h"
James Kuszmaula16a7912022-06-17 10:58:12 -070016#include "aos/events/logging/replay_timing_generated.h"
James Kuszmaul09632422022-05-25 15:56:19 -070017#include "aos/events/shm_event_loop.h"
Austin Schuh92547522019-12-28 14:33:43 -080018#include "aos/events/simulated_event_loop.h"
James Kuszmaulc3f34d12022-08-15 15:57:55 -070019#include "aos/mutex/mutex.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070020#include "aos/network/message_bridge_server_generated.h"
Austin Schuh0ca1fd32020-12-18 22:53:05 -080021#include "aos/network/multinode_timestamp_filter.h"
Austin Schuh0de30f32020-12-06 12:44:28 -080022#include "aos/network/remote_message_generated.h"
Austin Schuh8bd96322020-02-13 21:18:22 -080023#include "aos/network/timestamp_filter.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080024#include "aos/time/time.h"
James Kuszmaula16a7912022-06-17 10:58:12 -070025#include "aos/util/threaded_queue.h"
James Kuszmaulc3f34d12022-08-15 15:57:55 -070026#include "aos/uuid.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080027#include "flatbuffers/flatbuffers.h"
28
29namespace aos {
30namespace logger {
31
Austin Schuhe33c08d2022-02-03 18:15:21 -080032class EventNotifier;
33
Austin Schuh6f3babe2020-01-26 20:34:50 -080034// We end up with one of the following 3 log file types.
35//
36// Single node logged as the source node.
37// -> Replayed just on the source node.
38//
39// Forwarding timestamps only logged from the perspective of the destination
40// node.
41// -> Matched with data on source node and logged.
42//
43// Forwarding timestamps with data logged as the destination node.
44// -> Replayed just as the destination
45// -> Replayed as the source (Much harder, ordering is not defined)
46//
47// Duplicate data logged. -> CHECK that it matches and explode otherwise.
48//
49// This can be boiled down to a set of constraints and tools.
50//
51// 1) Forwarding timestamps and data need to be logged separately.
52// 2) Any forwarded data logged on the destination node needs to be logged
53// separately such that it can be sorted.
54//
55// 1) Log reader needs to be able to sort a list of log files.
56// 2) Log reader needs to be able to merge sorted lists of log files.
57// 3) Log reader needs to be able to match timestamps with messages.
58//
59// We also need to be able to generate multiple views of a log file depending on
60// the target.
61
Austin Schuhe309d2a2019-11-29 13:25:21 -080062// Replays all the channels in the logfile to the event loop.
63class LogReader {
64 public:
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -080065 // If you want to supply a new configuration that will be used for replay
66 // (e.g., to change message rates, or to populate an updated schema), then
67 // pass it in here. It must provide all the channels that the original logged
68 // config did.
Austin Schuh6f3babe2020-01-26 20:34:50 -080069 //
Austin Schuh287d43d2020-12-04 20:19:33 -080070 // The single file constructor calls SortParts internally.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -080071 LogReader(std::string_view filename,
72 const Configuration *replay_configuration = nullptr);
Austin Schuh287d43d2020-12-04 20:19:33 -080073 LogReader(std::vector<LogFile> log_files,
Austin Schuh11d43732020-09-21 17:28:30 -070074 const Configuration *replay_configuration = nullptr);
James Kuszmaul7daef362019-12-31 18:28:17 -080075 ~LogReader();
Austin Schuhe309d2a2019-11-29 13:25:21 -080076
Austin Schuh6331ef92020-01-07 18:28:09 -080077 // Registers all the callbacks to send the log file data out on an event loop
78 // created in event_loop_factory. This also updates time to be at the start
79 // of the log file by running until the log file starts.
80 // Note: the configuration used in the factory should be configuration()
81 // below, but can be anything as long as the locations needed to send
82 // everything are available.
James Kuszmaul84ff3e52020-01-03 19:48:53 -080083 void Register(SimulatedEventLoopFactory *event_loop_factory);
Austin Schuhe33c08d2022-02-03 18:15:21 -080084
Austin Schuh58646e22021-08-23 23:51:46 -070085 // Registers all the callbacks to send the log file data out to an event loop
86 // factory. This does not start replaying or change the current distributed
87 // time of the factory. It does change the monotonic clocks to be right.
88 void RegisterWithoutStarting(SimulatedEventLoopFactory *event_loop_factory);
Austin Schuhe33c08d2022-02-03 18:15:21 -080089 // Runs the log until the last start time. Register above is defined as:
90 // Register(...) {
91 // RegisterWithoutStarting
92 // StartAfterRegister
93 // }
94 // This should generally be considered as a stepping stone to convert from
95 // Register() to RegisterWithoutStarting() incrementally.
96 void StartAfterRegister(SimulatedEventLoopFactory *event_loop_factory);
97
Austin Schuh6331ef92020-01-07 18:28:09 -080098 // Creates an SimulatedEventLoopFactory accessible via event_loop_factory(),
99 // and then calls Register.
100 void Register();
James Kuszmaul09632422022-05-25 15:56:19 -0700101
Austin Schuh6331ef92020-01-07 18:28:09 -0800102 // Registers callbacks for all the events after the log file starts. This is
103 // only useful when replaying live.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800104 void Register(EventLoop *event_loop);
Austin Schuh6331ef92020-01-07 18:28:09 -0800105
James Kuszmaula16a7912022-06-17 10:58:12 -0700106 // Sets a sender that should be used for tracking timing statistics. If not
107 // set, no statistics will be recorded.
108 void set_timing_accuracy_sender(
109 const Node *node, aos::Sender<timing::ReplayTiming> timing_sender) {
110 states_[configuration::GetNodeIndex(configuration(), node)]
111 ->set_timing_accuracy_sender(std::move(timing_sender));
112 }
113
Austin Schuh58646e22021-08-23 23:51:46 -0700114 // Called whenever a log file starts for a node.
115 void OnStart(std::function<void()> fn);
116 void OnStart(const Node *node, std::function<void()> fn);
117 // Called whenever a log file ends for a node.
118 void OnEnd(std::function<void()> fn);
119 void OnEnd(const Node *node, std::function<void()> fn);
120
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800121 // Unregisters the senders. You only need to call this if you separately
122 // supplied an event loop or event loop factory and the lifetimes are such
123 // that they need to be explicitly destroyed before the LogReader destructor
124 // gets called.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800125 void Deregister();
126
Austin Schuh0c297012020-09-16 18:41:59 -0700127 // Returns the configuration being used for replay from the log file.
128 // Note that this may be different from the configuration actually used for
129 // handling events. You should generally only use this to create a
130 // SimulatedEventLoopFactory, and then get the configuration from there for
131 // everything else.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800132 const Configuration *logged_configuration() const;
Austin Schuh11d43732020-09-21 17:28:30 -0700133 // Returns the configuration being used for replay from the log file.
134 // Note that this may be different from the configuration actually used for
135 // handling events. You should generally only use this to create a
136 // SimulatedEventLoopFactory, and then get the configuration from there for
137 // everything else.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800138 // The pointer is invalidated whenever RemapLoggedChannel is called.
Austin Schuh15649d62019-12-28 16:36:38 -0800139 const Configuration *configuration() const;
140
Austin Schuh6f3babe2020-01-26 20:34:50 -0800141 // Returns the nodes that this log file was created on. This is a list of
Austin Schuh07676622021-01-21 18:59:17 -0800142 // pointers to a node in the nodes() list inside logged_configuration().
143 std::vector<const Node *> LoggedNodes() const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800144
145 // Returns the starting timestamp for the log file.
Austin Schuh11d43732020-09-21 17:28:30 -0700146 monotonic_clock::time_point monotonic_start_time(
147 const Node *node = nullptr) const;
148 realtime_clock::time_point realtime_start_time(
149 const Node *node = nullptr) const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800150
Austin Schuhe33c08d2022-02-03 18:15:21 -0800151 // Sets the start and end times to replay data until for all nodes. This
152 // overrides the --start_time and --end_time flags. The default is to replay
153 // all data.
154 void SetStartTime(std::string start_time);
155 void SetStartTime(realtime_clock::time_point start_time);
156 void SetEndTime(std::string end_time);
157 void SetEndTime(realtime_clock::time_point end_time);
158
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800159 // Causes the logger to publish the provided channel on a different name so
160 // that replayed applications can publish on the proper channel name without
161 // interference. This operates on raw channel names, without any node or
162 // application specific mappings.
163 void RemapLoggedChannel(std::string_view name, std::string_view type,
Austin Schuh0de30f32020-12-06 12:44:28 -0800164 std::string_view add_prefix = "/original",
165 std::string_view new_type = "");
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800166 template <typename T>
167 void RemapLoggedChannel(std::string_view name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800168 std::string_view add_prefix = "/original",
169 std::string_view new_type = "") {
170 RemapLoggedChannel(name, T::GetFullyQualifiedName(), add_prefix, new_type);
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800171 }
172
Austin Schuh01b4c352020-09-21 23:09:39 -0700173 // Remaps the provided channel, though this respects node mappings, and
174 // preserves them too. This makes it so if /aos -> /pi1/aos on one node,
175 // /original/aos -> /original/pi1/aos on the same node after renaming, just
Austin Schuh0de30f32020-12-06 12:44:28 -0800176 // like you would hope. If new_type is not empty, the new channel will use
177 // the provided type instead. This allows for renaming messages.
Austin Schuh01b4c352020-09-21 23:09:39 -0700178 //
179 // TODO(austin): If you have 2 nodes remapping something to the same channel,
180 // this doesn't handle that. No use cases exist yet for that, so it isn't
181 // being done yet.
182 void RemapLoggedChannel(std::string_view name, std::string_view type,
183 const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800184 std::string_view add_prefix = "/original",
185 std::string_view new_type = "");
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700186 template <typename T>
Austin Schuh01b4c352020-09-21 23:09:39 -0700187 void RemapLoggedChannel(std::string_view name, const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800188 std::string_view add_prefix = "/original",
189 std::string_view new_type = "") {
190 RemapLoggedChannel(name, T::GetFullyQualifiedName(), node, add_prefix,
191 new_type);
Austin Schuh01b4c352020-09-21 23:09:39 -0700192 }
193
194 template <typename T>
195 bool HasChannel(std::string_view name, const Node *node = nullptr) {
Austin Schuh0ca51f32020-12-25 21:51:45 -0800196 return configuration::GetChannel(logged_configuration(), name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800197 T::GetFullyQualifiedName(), "", node,
198 true) != nullptr;
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700199 }
200
Austin Schuh82529062021-12-08 12:09:52 -0800201 template <typename T>
202 void MaybeRemapLoggedChannel(std::string_view name,
203 const Node *node = nullptr) {
204 if (HasChannel<T>(name, node)) {
205 RemapLoggedChannel<T>(name, node);
206 }
207 }
208
James Kuszmaul4f106fb2021-01-05 20:53:02 -0800209 // Returns true if the channel exists on the node and was logged.
210 template <typename T>
211 bool HasLoggedChannel(std::string_view name, const Node *node = nullptr) {
Austin Schuh5ee56872021-01-30 16:53:34 -0800212 const Channel *channel =
213 configuration::GetChannel(logged_configuration(), name,
214 T::GetFullyQualifiedName(), "", node, true);
James Kuszmaul4f106fb2021-01-05 20:53:02 -0800215 if (channel == nullptr) return false;
216 return channel->logger() != LoggerConfig::NOT_LOGGED;
217 }
218
Austin Schuh1c227352021-09-17 12:53:54 -0700219 // Returns a list of all the original channels from remapping.
220 std::vector<const Channel *> RemappedChannels() const;
221
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800222 SimulatedEventLoopFactory *event_loop_factory() {
223 return event_loop_factory_;
224 }
225
Austin Schuh0ca51f32020-12-25 21:51:45 -0800226 std::string_view name() const { return log_files_[0].name; }
Austin Schuh0c297012020-09-16 18:41:59 -0700227
James Kuszmaul71a81932020-12-15 21:08:01 -0800228 // Set whether to exit the SimulatedEventLoopFactory when we finish reading
229 // the logfile.
230 void set_exit_on_finish(bool exit_on_finish) {
231 exit_on_finish_ = exit_on_finish;
232 }
233
James Kuszmaulb67409b2022-06-20 16:25:03 -0700234 // Sets the realtime replay rate. A value of 1.0 will cause the scheduler to
235 // try to play events in realtime. 0.5 will run at half speed. Use infinity
236 // (the default) to run as fast as possible. This can be changed during
237 // run-time.
238 // Only applies when running against a SimulatedEventLoopFactory.
239 void SetRealtimeReplayRate(double replay_rate);
240
Austin Schuhe309d2a2019-11-29 13:25:21 -0800241 private:
Austin Schuh58646e22021-08-23 23:51:46 -0700242 void Register(EventLoop *event_loop, const Node *node);
243
244 void RegisterDuringStartup(EventLoop *event_loop, const Node *node);
245
246 const Channel *RemapChannel(const EventLoop *event_loop, const Node *node,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800247 const Channel *channel);
248
Austin Schuhe309d2a2019-11-29 13:25:21 -0800249 // Queues at least max_out_of_order_duration_ messages into channels_.
250 void QueueMessages();
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800251 // Handle constructing a configuration with all the additional remapped
252 // channels from calls to RemapLoggedChannel.
253 void MakeRemappedConfig();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800254
Austin Schuh2f8fd752020-09-01 22:38:28 -0700255 // Returns the number of nodes.
256 size_t nodes_count() const {
257 return !configuration::MultiNode(logged_configuration())
258 ? 1u
259 : logged_configuration()->nodes()->size();
260 }
261
Austin Schuh287d43d2020-12-04 20:19:33 -0800262 const std::vector<LogFile> log_files_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800263
Austin Schuh969cd602021-01-03 00:09:45 -0800264 // Class to manage sending RemoteMessages on the provided node after the
265 // correct delay.
Austin Schuh5ee56872021-01-30 16:53:34 -0800266 class RemoteMessageSender {
Austin Schuh969cd602021-01-03 00:09:45 -0800267 public:
268 RemoteMessageSender(aos::Sender<message_bridge::RemoteMessage> sender,
269 EventLoop *event_loop);
270 RemoteMessageSender(RemoteMessageSender const &) = delete;
271 RemoteMessageSender &operator=(RemoteMessageSender const &) = delete;
272
273 // Sends the provided message. If monotonic_timestamp_time is min_time,
274 // send it immediately.
275 void Send(
276 FlatbufferDetachedBuffer<message_bridge::RemoteMessage> remote_message,
Austin Schuh58646e22021-08-23 23:51:46 -0700277 BootTimestamp monotonic_timestamp_time, size_t source_boot_count);
Austin Schuh969cd602021-01-03 00:09:45 -0800278
279 private:
280 // Handles actually sending the timestamp if we were delayed.
281 void SendTimestamp();
282 // Handles scheduling the timer to send at the correct time.
283 void ScheduleTimestamp();
284
285 EventLoop *event_loop_;
286 aos::Sender<message_bridge::RemoteMessage> sender_;
287 aos::TimerHandler *timer_;
288
289 // Time we are scheduled for, or min_time if we aren't scheduled.
290 monotonic_clock::time_point scheduled_time_ = monotonic_clock::min_time;
291
292 struct Timestamp {
293 Timestamp(FlatbufferDetachedBuffer<message_bridge::RemoteMessage>
294 new_remote_message,
295 monotonic_clock::time_point new_monotonic_timestamp_time)
296 : remote_message(std::move(new_remote_message)),
297 monotonic_timestamp_time(new_monotonic_timestamp_time) {}
298 FlatbufferDetachedBuffer<message_bridge::RemoteMessage> remote_message;
299 monotonic_clock::time_point monotonic_timestamp_time;
300 };
301
302 // List of messages to send. The timer works through them and then disables
303 // itself automatically.
304 std::deque<Timestamp> remote_timestamps_;
305 };
306
Austin Schuh6f3babe2020-01-26 20:34:50 -0800307 // State per node.
Austin Schuh858c9f32020-08-31 16:56:12 -0700308 class State {
309 public:
James Kuszmaula16a7912022-06-17 10:58:12 -0700310 // Whether we should spin up a separate thread for buffering up messages.
311 // Only allowed in realtime replay--see comments on threading_ member for
312 // details.
313 enum class ThreadedBuffering { kYes, kNo };
James Kuszmaul09632422022-05-25 15:56:19 -0700314 State(std::unique_ptr<TimestampMapper> timestamp_mapper,
315 message_bridge::MultiNodeNoncausalOffsetEstimator *multinode_filters,
James Kuszmaula16a7912022-06-17 10:58:12 -0700316 const Node *node, ThreadedBuffering threading);
Austin Schuh287d43d2020-12-04 20:19:33 -0800317
318 // Connects up the timestamp mappers.
319 void AddPeer(State *peer);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800320
Austin Schuhe639ea12021-01-25 13:00:22 -0800321 TimestampMapper *timestamp_mapper() { return timestamp_mapper_.get(); }
322
Austin Schuhdda74ec2021-01-03 19:30:37 -0800323 // Returns the next sorted message with all the timestamps extracted and
324 // matched.
325 TimestampedMessage PopOldest();
Austin Schuh188eabe2020-12-29 23:41:13 -0800326
Austin Schuh858c9f32020-08-31 16:56:12 -0700327 // Returns the monotonic time of the oldest message.
James Kuszmaula16a7912022-06-17 10:58:12 -0700328 BootTimestamp SingleThreadedOldestMessageTime();
329 // Returns the monotonic time of the oldest message, handling querying the
330 // separate thread of ThreadedBuffering was set.
331 BootTimestamp MultiThreadedOldestMessageTime();
Austin Schuh58646e22021-08-23 23:51:46 -0700332
333 size_t boot_count() const {
334 // If we are replaying directly into an event loop, we can't reboot. So
335 // we will stay stuck on the 0th boot.
James Kuszmaul09632422022-05-25 15:56:19 -0700336 if (!node_event_loop_factory_) {
337 if (event_loop_ == nullptr) {
338 // If boot_count is being checked after startup for any of the
339 // non-primary nodes, then returning 0 may not be accurate (since
340 // remote nodes *can* reboot even if the EventLoop being played to
341 // can't).
342 CHECK(!started_);
343 CHECK(!stopped_);
344 }
345 return 0u;
346 }
Austin Schuh58646e22021-08-23 23:51:46 -0700347 return node_event_loop_factory_->boot_count();
348 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700349
350 // Primes the queues inside State. Should be called before calling
351 // OldestMessageTime.
352 void SeedSortedMessages();
Austin Schuh8bd96322020-02-13 21:18:22 -0800353
Austin Schuh58646e22021-08-23 23:51:46 -0700354 void SetupStartupTimer() {
355 const monotonic_clock::time_point start_time =
356 monotonic_start_time(boot_count());
357 if (start_time == monotonic_clock::min_time) {
358 LOG(ERROR)
359 << "No start time, skipping, please figure out when this happens";
Austin Schuhe33c08d2022-02-03 18:15:21 -0800360 NotifyLogfileStart();
Austin Schuh58646e22021-08-23 23:51:46 -0700361 return;
362 }
James Kuszmaul09632422022-05-25 15:56:19 -0700363 if (node_event_loop_factory_) {
364 CHECK_GE(start_time + clock_offset(), event_loop_->monotonic_now());
365 }
366 startup_timer_->Setup(start_time + clock_offset());
Austin Schuh58646e22021-08-23 23:51:46 -0700367 }
368
369 void set_startup_timer(TimerHandler *timer_handler) {
370 startup_timer_ = timer_handler;
371 if (startup_timer_) {
372 if (event_loop_->node() != nullptr) {
373 startup_timer_->set_name(absl::StrCat(
374 event_loop_->node()->name()->string_view(), "_startup"));
375 } else {
376 startup_timer_->set_name("startup");
377 }
378 }
379 }
380
Austin Schuh858c9f32020-08-31 16:56:12 -0700381 // Returns the starting time for this node.
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700382 monotonic_clock::time_point monotonic_start_time(size_t boot_count) const {
383 return timestamp_mapper_
384 ? timestamp_mapper_->monotonic_start_time(boot_count)
385 : monotonic_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700386 }
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700387 realtime_clock::time_point realtime_start_time(size_t boot_count) const {
388 return timestamp_mapper_
389 ? timestamp_mapper_->realtime_start_time(boot_count)
390 : realtime_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700391 }
392
393 // Sets the node event loop factory for replaying into a
394 // SimulatedEventLoopFactory. Returns the EventLoop to use.
Austin Schuh60e77942022-05-16 17:48:24 -0700395 void SetNodeEventLoopFactory(NodeEventLoopFactory *node_event_loop_factory,
396 SimulatedEventLoopFactory *event_loop_factory);
Austin Schuh858c9f32020-08-31 16:56:12 -0700397
398 // Sets and gets the event loop to use.
399 void set_event_loop(EventLoop *event_loop) { event_loop_ = event_loop; }
400 EventLoop *event_loop() { return event_loop_; }
401
Austin Schuh58646e22021-08-23 23:51:46 -0700402 const Node *node() const { return node_; }
403
404 void Register(EventLoop *event_loop);
405
406 void OnStart(std::function<void()> fn);
407 void OnEnd(std::function<void()> fn);
408
Austin Schuh858c9f32020-08-31 16:56:12 -0700409 // Sets the current realtime offset from the monotonic clock for this node
410 // (if we are on a simulated event loop).
411 void SetRealtimeOffset(monotonic_clock::time_point monotonic_time,
412 realtime_clock::time_point realtime_time) {
413 if (node_event_loop_factory_ != nullptr) {
414 node_event_loop_factory_->SetRealtimeOffset(monotonic_time,
415 realtime_time);
416 }
417 }
418
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700419 // Returns the MessageHeader sender to log delivery timestamps to for the
420 // provided remote node.
Austin Schuh61e973f2021-02-21 21:43:56 -0800421 RemoteMessageSender *RemoteTimestampSender(const Channel *channel,
422 const Connection *connection);
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700423
Austin Schuh858c9f32020-08-31 16:56:12 -0700424 // Converts a timestamp from the monotonic clock on this node to the
425 // distributed clock.
426 distributed_clock::time_point ToDistributedClock(
427 monotonic_clock::time_point time) {
James Kuszmaul09632422022-05-25 15:56:19 -0700428 CHECK(node_event_loop_factory_);
Austin Schuh858c9f32020-08-31 16:56:12 -0700429 return node_event_loop_factory_->ToDistributedClock(time);
430 }
431
Austin Schuh858c9f32020-08-31 16:56:12 -0700432 // Returns the current time on the remote node which sends messages on
433 // channel_index.
Austin Schuh58646e22021-08-23 23:51:46 -0700434 BootTimestamp monotonic_remote_now(size_t channel_index) {
435 State *s = channel_source_state_[channel_index];
436 return BootTimestamp{
437 .boot = s->boot_count(),
438 .time = s->node_event_loop_factory_->monotonic_now()};
Austin Schuh858c9f32020-08-31 16:56:12 -0700439 }
440
Austin Schuh5ee56872021-01-30 16:53:34 -0800441 // Returns the start time of the remote for the provided channel.
442 monotonic_clock::time_point monotonic_remote_start_time(
Austin Schuh58646e22021-08-23 23:51:46 -0700443 size_t boot_count, size_t channel_index) {
Austin Schuh2dc8c7d2021-07-01 17:41:28 -0700444 return channel_source_state_[channel_index]->monotonic_start_time(
445 boot_count);
Austin Schuh5ee56872021-01-30 16:53:34 -0800446 }
447
Austin Schuh58646e22021-08-23 23:51:46 -0700448 void DestroyEventLoop() { event_loop_unique_ptr_.reset(); }
449
450 EventLoop *MakeEventLoop() {
451 CHECK(!event_loop_unique_ptr_);
James Kuszmaul890c2492022-04-06 14:59:31 -0700452 // TODO(james): Enable exclusive senders on LogReader to allow us to
453 // ensure we are remapping channels correctly.
454 event_loop_unique_ptr_ = node_event_loop_factory_->MakeEventLoop(
455 "log_reader", {NodeEventLoopFactory::CheckSentTooFast::kNo,
James Kuszmaul94ca5132022-07-19 09:11:08 -0700456 NodeEventLoopFactory::ExclusiveSenders::kYes,
457 NonExclusiveChannels()});
Austin Schuh58646e22021-08-23 23:51:46 -0700458 return event_loop_unique_ptr_.get();
459 }
460
Austin Schuh2f8fd752020-09-01 22:38:28 -0700461 distributed_clock::time_point RemoteToDistributedClock(
462 size_t channel_index, monotonic_clock::time_point time) {
James Kuszmaul09632422022-05-25 15:56:19 -0700463 CHECK(node_event_loop_factory_);
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700464 return channel_source_state_[channel_index]
465 ->node_event_loop_factory_->ToDistributedClock(time);
Austin Schuh2f8fd752020-09-01 22:38:28 -0700466 }
467
468 const Node *remote_node(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700469 return channel_source_state_[channel_index]
470 ->node_event_loop_factory_->node();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700471 }
472
Stephan Pleines559fa6c2022-01-06 17:23:51 -0800473 monotonic_clock::time_point monotonic_now() const {
James Kuszmaul09632422022-05-25 15:56:19 -0700474 return event_loop_->monotonic_now();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700475 }
476
Austin Schuh858c9f32020-08-31 16:56:12 -0700477 // Sets the number of channels.
478 void SetChannelCount(size_t count);
479
480 // Sets the sender, filter, and target factory for a channel.
Austin Schuh969cd602021-01-03 00:09:45 -0800481 void SetChannel(size_t logged_channel_index, size_t factory_channel_index,
482 std::unique_ptr<RawSender> sender,
483 message_bridge::NoncausalOffsetEstimator *filter,
Austin Schuh58646e22021-08-23 23:51:46 -0700484 bool is_forwarded, State *source_state);
485
486 void SetRemoteTimestampSender(size_t logged_channel_index,
487 RemoteMessageSender *remote_timestamp_sender);
488
489 void RunOnStart();
490 void RunOnEnd();
Austin Schuh858c9f32020-08-31 16:56:12 -0700491
Austin Schuhe33c08d2022-02-03 18:15:21 -0800492 // Handles a logfile start event to potentially call the OnStart callbacks.
493 void NotifyLogfileStart();
494 // Handles a start time flag start event to potentially call the OnStart
495 // callbacks.
496 void NotifyFlagStart();
497
498 // Handles a logfile end event to potentially call the OnEnd callbacks.
499 void NotifyLogfileEnd();
500 // Handles a end time flag start event to potentially call the OnEnd
501 // callbacks.
502 void NotifyFlagEnd();
503
Austin Schuh858c9f32020-08-31 16:56:12 -0700504 // Unregisters everything so we can destory the event loop.
Austin Schuh58646e22021-08-23 23:51:46 -0700505 // TODO(austin): Is this needed? OnShutdown should be able to serve this
506 // need.
Austin Schuh858c9f32020-08-31 16:56:12 -0700507 void Deregister();
508
509 // Sets the current TimerHandle for the replay callback.
510 void set_timer_handler(TimerHandler *timer_handler) {
511 timer_handler_ = timer_handler;
Austin Schuh58646e22021-08-23 23:51:46 -0700512 if (timer_handler_) {
513 if (event_loop_->node() != nullptr) {
514 timer_handler_->set_name(absl::StrCat(
515 event_loop_->node()->name()->string_view(), "_main"));
516 } else {
517 timer_handler_->set_name("main");
518 }
519 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700520 }
521
Austin Schuhe33c08d2022-02-03 18:15:21 -0800522 // Creates and registers the --start_time and --end_time event callbacks.
523 void SetStartTimeFlag(realtime_clock::time_point start_time);
524 void SetEndTimeFlag(realtime_clock::time_point end_time);
525
526 // Notices the next message to update the start/end time callbacks.
527 void ObserveNextMessage(monotonic_clock::time_point monotonic_event,
528 realtime_clock::time_point realtime_event);
529
530 // Clears the start and end time flag handlers so we can delete the event
531 // loop.
532 void ClearTimeFlags();
533
Austin Schuh858c9f32020-08-31 16:56:12 -0700534 // Sets the next wakeup time on the replay callback.
535 void Setup(monotonic_clock::time_point next_time) {
James Kuszmaul8866e642022-06-10 16:00:36 -0700536 timer_handler_->Setup(
537 std::max(monotonic_now(), next_time + clock_offset()));
Austin Schuh858c9f32020-08-31 16:56:12 -0700538 }
539
540 // Sends a buffer on the provided channel index.
Austin Schuh287d43d2020-12-04 20:19:33 -0800541 bool Send(const TimestampedMessage &timestamped_message);
Austin Schuh858c9f32020-08-31 16:56:12 -0700542
James Kuszmaulc3f34d12022-08-15 15:57:55 -0700543 void MaybeSetClockOffset();
James Kuszmaul09632422022-05-25 15:56:19 -0700544 std::chrono::nanoseconds clock_offset() const { return clock_offset_; }
545
Austin Schuh858c9f32020-08-31 16:56:12 -0700546 // Returns a debug string for the channel merger.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700547 std::string DebugString() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800548 if (!timestamp_mapper_) {
Austin Schuhe639ea12021-01-25 13:00:22 -0800549 return "";
Austin Schuh287d43d2020-12-04 20:19:33 -0800550 }
Austin Schuhe639ea12021-01-25 13:00:22 -0800551 return timestamp_mapper_->DebugString();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700552 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700553
Austin Schuh58646e22021-08-23 23:51:46 -0700554 void ClearRemoteTimestampSenders() {
555 channel_timestamp_loggers_.clear();
556 timestamp_loggers_.clear();
557 }
558
Austin Schuhbd5f74a2021-11-11 20:55:38 -0800559 void SetFoundLastMessage(bool val) {
560 found_last_message_ = val;
561 last_message_.resize(factory_channel_index_.size(), false);
562 }
563 bool found_last_message() const { return found_last_message_; }
564
565 void set_last_message(size_t channel_index) {
566 CHECK_LT(channel_index, last_message_.size());
567 last_message_[channel_index] = true;
568 }
569
570 bool last_message(size_t channel_index) {
571 CHECK_LT(channel_index, last_message_.size());
572 return last_message_[channel_index];
573 }
574
James Kuszmaula16a7912022-06-17 10:58:12 -0700575 void set_timing_accuracy_sender(
576 aos::Sender<timing::ReplayTiming> timing_sender) {
577 timing_statistics_sender_ = std::move(timing_sender);
578 OnEnd([this]() { SendMessageTimings(); });
579 }
580
581 // If running with ThreadedBuffering::kYes, will start the processing thread
582 // and queue up messages until the specified time. No-op of
583 // ThreadedBuffering::kNo is set. Should only be called once.
584 void QueueThreadUntil(BootTimestamp time);
585
Austin Schuh858c9f32020-08-31 16:56:12 -0700586 private:
James Kuszmaulc3f34d12022-08-15 15:57:55 -0700587 void TrackMessageSendTiming(const RawSender &sender,
588 monotonic_clock::time_point expected_send_time);
James Kuszmaula16a7912022-06-17 10:58:12 -0700589 void SendMessageTimings();
Austin Schuh858c9f32020-08-31 16:56:12 -0700590 // Log file.
Austin Schuh287d43d2020-12-04 20:19:33 -0800591 std::unique_ptr<TimestampMapper> timestamp_mapper_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700592
Austin Schuh858c9f32020-08-31 16:56:12 -0700593 // Senders.
594 std::vector<std::unique_ptr<RawSender>> channels_;
Austin Schuh969cd602021-01-03 00:09:45 -0800595 std::vector<RemoteMessageSender *> remote_timestamp_senders_;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700596 // The mapping from logged channel index to sent channel index. Needed for
597 // sending out MessageHeaders.
598 std::vector<int> factory_channel_index_;
599
Austin Schuh9942bae2021-01-07 22:06:44 -0800600 struct ContiguousSentTimestamp {
601 // Most timestamps make it through the network, so it saves a ton of
602 // memory and CPU to store the start and end, and search for valid ranges.
603 // For one of the logs I looked at, we had 2 ranges for 4 days.
604 //
605 // Save monotonic times as well to help if a queue index ever wraps. Odds
606 // are very low, but doesn't hurt.
607 //
608 // The starting time and matching queue index.
609 monotonic_clock::time_point starting_monotonic_event_time =
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700610 monotonic_clock::min_time;
Austin Schuh9942bae2021-01-07 22:06:44 -0800611 uint32_t starting_queue_index = 0xffffffff;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700612
Austin Schuh9942bae2021-01-07 22:06:44 -0800613 // Ending time and queue index.
614 monotonic_clock::time_point ending_monotonic_event_time =
615 monotonic_clock::max_time;
616 uint32_t ending_queue_index = 0xffffffff;
617
618 // The queue index that the first message was *actually* sent with. The
619 // queue indices are assumed to be contiguous through this range.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700620 uint32_t actual_queue_index = 0xffffffff;
621 };
622
James Kuszmaul94ca5132022-07-19 09:11:08 -0700623 // Returns a list of channels which LogReader will send on but which may
624 // *also* get sent on by other applications in replay.
625 std::vector<
626 std::pair<const aos::Channel *, NodeEventLoopFactory::ExclusiveSenders>>
627 NonExclusiveChannels();
628
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700629 // Stores all the timestamps that have been sent on this channel. This is
630 // only done for channels which are forwarded and on the node which
Austin Schuh9942bae2021-01-07 22:06:44 -0800631 // initially sends the message. Compress using ranges and offsets.
632 std::vector<std::unique_ptr<std::vector<ContiguousSentTimestamp>>>
633 queue_index_map_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700634
635 // Factory (if we are in sim) that this loop was created on.
636 NodeEventLoopFactory *node_event_loop_factory_ = nullptr;
Austin Schuhe33c08d2022-02-03 18:15:21 -0800637 SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
638
Austin Schuh858c9f32020-08-31 16:56:12 -0700639 std::unique_ptr<EventLoop> event_loop_unique_ptr_;
640 // Event loop.
Austin Schuh58646e22021-08-23 23:51:46 -0700641 const Node *node_ = nullptr;
Austin Schuh858c9f32020-08-31 16:56:12 -0700642 EventLoop *event_loop_ = nullptr;
643 // And timer used to send messages.
Austin Schuh58646e22021-08-23 23:51:46 -0700644 TimerHandler *timer_handler_ = nullptr;
645 TimerHandler *startup_timer_ = nullptr;
Austin Schuh858c9f32020-08-31 16:56:12 -0700646
Austin Schuhe33c08d2022-02-03 18:15:21 -0800647 std::unique_ptr<EventNotifier> start_event_notifier_;
648 std::unique_ptr<EventNotifier> end_event_notifier_;
649
Austin Schuh8bd96322020-02-13 21:18:22 -0800650 // Filters (or nullptr if it isn't a forwarded channel) for each channel.
651 // This corresponds to the object which is shared among all the channels
652 // going between 2 nodes. The second element in the tuple indicates if this
653 // is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700654 std::vector<message_bridge::NoncausalOffsetEstimator *> filters_;
James Kuszmaul09632422022-05-25 15:56:19 -0700655 message_bridge::MultiNodeNoncausalOffsetEstimator *multinode_filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800656
657 // List of NodeEventLoopFactorys (or nullptr if it isn't a forwarded
658 // channel) which correspond to the originating node.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700659 std::vector<State *> channel_source_state_;
660
Austin Schuh61e973f2021-02-21 21:43:56 -0800661 // This is a cache for channel, connection mapping to the corresponding
662 // sender.
663 absl::btree_map<std::pair<const Channel *, const Connection *>,
664 std::shared_ptr<RemoteMessageSender>>
665 channel_timestamp_loggers_;
666
667 // Mapping from resolved RemoteMessage channel to RemoteMessage sender. This
668 // is the channel that timestamps are published to.
669 absl::btree_map<const Channel *, std::shared_ptr<RemoteMessageSender>>
670 timestamp_loggers_;
Austin Schuh58646e22021-08-23 23:51:46 -0700671
James Kuszmaul09632422022-05-25 15:56:19 -0700672 // Time offset between the log's monotonic clock and the current event
673 // loop's monotonic clock. Useful when replaying logs with non-simulated
674 // event loops.
675 std::chrono::nanoseconds clock_offset_{0};
676
Austin Schuh58646e22021-08-23 23:51:46 -0700677 std::vector<std::function<void()>> on_starts_;
678 std::vector<std::function<void()>> on_ends_;
679
James Kuszmaula16a7912022-06-17 10:58:12 -0700680 std::atomic<bool> stopped_ = false;
681 std::atomic<bool> started_ = false;
Austin Schuhbd5f74a2021-11-11 20:55:38 -0800682
683 bool found_last_message_ = false;
684 std::vector<bool> last_message_;
James Kuszmaula16a7912022-06-17 10:58:12 -0700685
686 std::vector<timing::MessageTimingT> send_timings_;
687 aos::Sender<timing::ReplayTiming> timing_statistics_sender_;
688
689 // Protects access to any internal state after Run() is called. Designed
690 // assuming that only one node is actually executing in replay.
691 // Threading design:
692 // * The worker passed to message_queuer_ has full ownership over all
693 // the log-reading code, timestamp filters, last_queued_message_, etc.
694 // * The main thread should only have exclusive access to the replay
695 // event loop and associated features (mainly senders).
696 // It will pop an item out of the queue (which does maintain a shared_ptr
697 // reference which may also be being used by the message_queuer_ thread,
698 // but having shared_ptr's accessing the same memory from
699 // separate threads is permissible).
700 // Enabling this in simulation is currently infeasible due to a lack of
701 // synchronization in the MultiNodeNoncausalOffsetEstimator. Essentially,
702 // when the message_queuer_ thread attempts to read/pop messages from the
703 // timestamp_mapper_, it will end up calling callbacks that update the
704 // internal state of the MultiNodeNoncausalOffsetEstimator. Simultaneously,
705 // the event scheduler that is running in the main thread to orchestrate the
706 // simulation will be querying the estimator to know what the clocks on the
707 // various nodes are at, leading to potential issues.
708 ThreadedBuffering threading_;
709 std::optional<BootTimestamp> last_queued_message_;
710 std::optional<util::ThreadedQueue<TimestampedMessage, BootTimestamp>>
711 message_queuer_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800712 };
713
Austin Schuh8bd96322020-02-13 21:18:22 -0800714 // Node index -> State.
715 std::vector<std::unique_ptr<State>> states_;
716
717 // Creates the requested filter if it doesn't exist, regardless of whether
718 // these nodes can actually communicate directly. The second return value
719 // reports if this is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700720 message_bridge::NoncausalOffsetEstimator *GetFilter(const Node *node_a,
721 const Node *node_b);
Austin Schuh8bd96322020-02-13 21:18:22 -0800722
Austin Schuh8bd96322020-02-13 21:18:22 -0800723 // List of filters for a connection. The pointer to the first node will be
724 // less than the second node.
Austin Schuh0ca1fd32020-12-18 22:53:05 -0800725 std::unique_ptr<message_bridge::MultiNodeNoncausalOffsetEstimator> filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800726
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800727 std::unique_ptr<FlatbufferDetachedBuffer<Configuration>>
728 remapped_configuration_buffer_;
729
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800730 std::unique_ptr<SimulatedEventLoopFactory> event_loop_factory_unique_ptr_;
731 SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800732
733 // Map of channel indices to new name. The channel index will be an index into
734 // logged_configuration(), and the string key will be the name of the channel
735 // to send on instead of the logged channel name.
Austin Schuh0de30f32020-12-06 12:44:28 -0800736 struct RemappedChannel {
737 std::string remapped_name;
738 std::string new_type;
739 };
740 std::map<size_t, RemappedChannel> remapped_channels_;
Austin Schuh01b4c352020-09-21 23:09:39 -0700741 std::vector<MapT> maps_;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800742
Austin Schuh6f3babe2020-01-26 20:34:50 -0800743 // Number of nodes which still have data to send. This is used to figure out
744 // when to exit.
745 size_t live_nodes_ = 0;
746
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800747 const Configuration *remapped_configuration_ = nullptr;
748 const Configuration *replay_configuration_ = nullptr;
Austin Schuhcde938c2020-02-02 17:30:07 -0800749
750 // If true, the replay timer will ignore any missing data. This is used
751 // during startup when we are bootstrapping everything and trying to get to
752 // the start of all the log files.
753 bool ignore_missing_data_ = false;
James Kuszmaul71a81932020-12-15 21:08:01 -0800754
755 // Whether to exit the SimulatedEventLoop when we finish reading the logs.
756 bool exit_on_finish_ = true;
Austin Schuhe33c08d2022-02-03 18:15:21 -0800757
758 realtime_clock::time_point start_time_ = realtime_clock::min_time;
759 realtime_clock::time_point end_time_ = realtime_clock::max_time;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800760};
761
762} // namespace logger
763} // namespace aos
764
Austin Schuhb06f03b2021-02-17 22:00:37 -0800765#endif // AOS_EVENTS_LOGGING_LOG_READER_H_