blob: a248f80fff1b828d0615bb6500068b92da771638 [file] [log] [blame]
Austin Schuhe309d2a2019-11-29 13:25:21 -08001#ifndef AOS_EVENTS_LOGGER_H_
2#define AOS_EVENTS_LOGGER_H_
3
Austin Schuh8bd96322020-02-13 21:18:22 -08004#include <chrono>
Austin Schuhe309d2a2019-11-29 13:25:21 -08005#include <deque>
Austin Schuh05b70472020-01-01 17:11:17 -08006#include <string_view>
Austin Schuh2f8fd752020-09-01 22:38:28 -07007#include <tuple>
Austin Schuh6f3babe2020-01-26 20:34:50 -08008#include <vector>
Austin Schuhe309d2a2019-11-29 13:25:21 -08009
Austin Schuh8bd96322020-02-13 21:18:22 -080010#include "Eigen/Dense"
11#include "absl/strings/str_cat.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080012#include "absl/types/span.h"
13#include "aos/events/event_loop.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070014#include "aos/events/logging/eigen_mpq.h"
Austin Schuhcb5601b2020-09-10 15:29:59 -070015#include "aos/events/logging/log_namer.h"
Austin Schuhf6f9bf32020-10-11 14:37:43 -070016#include "aos/events/logging/logfile_sorting.h"
Austin Schuha36c8902019-12-30 18:07:15 -080017#include "aos/events/logging/logfile_utils.h"
James Kuszmaul38735e82019-12-07 16:42:06 -080018#include "aos/events/logging/logger_generated.h"
Austin Schuh64fab802020-09-09 22:47:47 -070019#include "aos/events/logging/uuid.h"
Austin Schuh92547522019-12-28 14:33:43 -080020#include "aos/events/simulated_event_loop.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070021#include "aos/network/message_bridge_server_generated.h"
Austin Schuh0de30f32020-12-06 12:44:28 -080022#include "aos/network/remote_message_generated.h"
Austin Schuh8bd96322020-02-13 21:18:22 -080023#include "aos/network/timestamp_filter.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080024#include "aos/time/time.h"
25#include "flatbuffers/flatbuffers.h"
Austin Schuh2f8fd752020-09-01 22:38:28 -070026#include "third_party/gmp/gmpxx.h"
Austin Schuhe309d2a2019-11-29 13:25:21 -080027
28namespace aos {
29namespace logger {
30
Austin Schuhe309d2a2019-11-29 13:25:21 -080031// Logs all channels available in the event loop to disk every 100 ms.
32// Start by logging one message per channel to capture any state and
33// configuration that is sent rately on a channel and would affect execution.
34class Logger {
35 public:
Austin Schuh0c297012020-09-16 18:41:59 -070036 // Constructs a logger.
Austin Schuh0c297012020-09-16 18:41:59 -070037 // event_loop: The event loop used to read the messages.
Austin Schuh0c297012020-09-16 18:41:59 -070038 // configuration: When provided, this is the configuration to log, and the
39 // configuration to use for the channel list to log. If not provided,
40 // this becomes the configuration from the event loop.
Brian Silverman1f345222020-09-24 21:14:48 -070041 // should_log: When provided, a filter for channels to log. If not provided,
42 // all available channels are logged.
43 Logger(EventLoop *event_loop)
44 : Logger(event_loop, event_loop->configuration()) {}
45 Logger(EventLoop *event_loop, const Configuration *configuration)
46 : Logger(event_loop, configuration,
47 [](const Channel *) { return true; }) {}
48 Logger(EventLoop *event_loop, const Configuration *configuration,
49 std::function<bool(const Channel *)> should_log);
Austin Schuh0c297012020-09-16 18:41:59 -070050 ~Logger();
51
52 // Overrides the name in the log file header.
53 void set_name(std::string_view name) { name_ = name; }
Austin Schuhe309d2a2019-11-29 13:25:21 -080054
Brian Silverman1f345222020-09-24 21:14:48 -070055 // Sets the callback to run after each period of data is logged. Defaults to
56 // doing nothing.
57 //
58 // This callback may safely do things like call Rotate().
59 void set_on_logged_period(std::function<void()> on_logged_period) {
60 on_logged_period_ = std::move(on_logged_period);
61 }
62
63 // Sets the period between polling the data. Defaults to 100ms.
64 //
65 // Changing this while a set of files is being written may result in
66 // unreadable files.
67 void set_polling_period(std::chrono::nanoseconds polling_period) {
68 polling_period_ = polling_period;
69 }
70
Brian Silvermanae7c0332020-09-30 16:58:23 -070071 std::string_view log_start_uuid() const { return log_start_uuid_; }
Brian Silverman035e4182020-10-06 17:13:00 -070072 UUID logger_instance_uuid() const { return logger_instance_uuid_; }
Brian Silvermanae7c0332020-09-30 16:58:23 -070073
Brian Silvermancb805822020-10-06 17:43:35 -070074 // The maximum time for a single fetch which returned a message, or 0 if none
75 // of those have happened.
76 std::chrono::nanoseconds max_message_fetch_time() const {
77 return max_message_fetch_time_;
78 }
79 // The channel for that longest fetch which returned a message, or -1 if none
80 // of those have happened.
81 int max_message_fetch_time_channel() const {
82 return max_message_fetch_time_channel_;
83 }
84 // The size of the message returned by that longest fetch, or -1 if none of
85 // those have happened.
86 int max_message_fetch_time_size() const {
87 return max_message_fetch_time_size_;
88 }
89 // The total time spent fetching messages.
90 std::chrono::nanoseconds total_message_fetch_time() const {
91 return total_message_fetch_time_;
92 }
93 // The total number of fetch calls which returned messages.
94 int total_message_fetch_count() const { return total_message_fetch_count_; }
95 // The total number of bytes fetched.
96 int64_t total_message_fetch_bytes() const {
97 return total_message_fetch_bytes_;
98 }
99
100 // The total time spent in fetches which did not return a message.
101 std::chrono::nanoseconds total_nop_fetch_time() const {
102 return total_nop_fetch_time_;
103 }
104 // The total number of fetches which did not return a message.
105 int total_nop_fetch_count() const { return total_nop_fetch_count_; }
106
107 // The maximum time for a single copy, or 0 if none of those have happened.
108 std::chrono::nanoseconds max_copy_time() const { return max_copy_time_; }
109 // The channel for that longest copy, or -1 if none of those have happened.
110 int max_copy_time_channel() const { return max_copy_time_channel_; }
111 // The size of the message for that longest copy, or -1 if none of those have
112 // happened.
113 int max_copy_time_size() const { return max_copy_time_size_; }
114 // The total time spent copying messages.
115 std::chrono::nanoseconds total_copy_time() const { return total_copy_time_; }
116 // The total number of messages copied.
117 int total_copy_count() const { return total_copy_count_; }
118 // The total number of bytes copied.
119 int64_t total_copy_bytes() const { return total_copy_bytes_; }
120
121 void ResetStatisics();
122
Austin Schuh2f8fd752020-09-01 22:38:28 -0700123 // Rotates the log file(s), triggering new part files to be written for each
124 // log file.
125 void Rotate();
Austin Schuhfa895892020-01-07 20:07:41 -0800126
Brian Silverman1f345222020-09-24 21:14:48 -0700127 // Starts logging to files with the given naming scheme.
Brian Silvermanae7c0332020-09-30 16:58:23 -0700128 //
129 // log_start_uuid may be used to tie this log event to other log events across
130 // multiple nodes. The default (empty string) indicates there isn't one
131 // available.
132 void StartLogging(std::unique_ptr<LogNamer> log_namer,
133 std::string_view log_start_uuid = "");
Brian Silverman1f345222020-09-24 21:14:48 -0700134
135 // Stops logging. Ensures any messages through end_time make it into the log.
136 //
137 // If you want to stop ASAP, pass min_time to avoid reading any more messages.
138 //
139 // Returns the LogNamer in case the caller wants to do anything else with it
140 // before destroying it.
141 std::unique_ptr<LogNamer> StopLogging(
142 aos::monotonic_clock::time_point end_time);
143
144 // Returns whether a log is currently being written.
145 bool is_started() const { return static_cast<bool>(log_namer_); }
146
147 // Shortcut to call StartLogging with a LocalLogNamer when event processing
148 // starts.
149 void StartLoggingLocalNamerOnRun(std::string base_name) {
150 event_loop_->OnRun([this, base_name]() {
151 StartLogging(
152 std::make_unique<LocalLogNamer>(base_name, event_loop_->node()));
153 });
154 }
155
Austin Schuhe309d2a2019-11-29 13:25:21 -0800156 private:
Austin Schuhe309d2a2019-11-29 13:25:21 -0800157 // Structure to track both a fetcher, and if the data fetched has been
158 // written. We may want to delay writing data to disk so that we don't let
159 // data get too far out of order when written to disk so we can avoid making
160 // it too hard to sort when reading.
161 struct FetcherStruct {
162 std::unique_ptr<RawFetcher> fetcher;
163 bool written = false;
Austin Schuh15649d62019-12-28 16:36:38 -0800164
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700165 // Channel index to log to.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800166 int channel_index = -1;
Brian Silverman1f345222020-09-24 21:14:48 -0700167 const Channel *channel = nullptr;
168 const Node *timestamp_node = nullptr;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800169
170 LogType log_type = LogType::kLogMessage;
171
Brian Silverman1f345222020-09-24 21:14:48 -0700172 // We fill out the metadata at construction, but the actual writers have to
173 // be updated each time we start logging. To avoid duplicating the complex
174 // logic determining whether each writer should be initialized, we just
175 // stash the answer in separate member variables.
176 bool wants_writer = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800177 DetachedBufferWriter *writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700178 bool wants_timestamp_writer = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800179 DetachedBufferWriter *timestamp_writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700180 bool wants_contents_writer = false;
Austin Schuh2f8fd752020-09-01 22:38:28 -0700181 DetachedBufferWriter *contents_writer = nullptr;
Brian Silverman1f345222020-09-24 21:14:48 -0700182
Austin Schuh2f8fd752020-09-01 22:38:28 -0700183 int node_index = 0;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800184 };
185
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700186 // Vector mapping from the channel index from the event loop to the logged
187 // channel index.
188 std::vector<int> event_loop_to_logged_channel_index_;
189
Austin Schuh2f8fd752020-09-01 22:38:28 -0700190 struct NodeState {
191 aos::monotonic_clock::time_point monotonic_start_time =
192 aos::monotonic_clock::min_time;
193 aos::realtime_clock::time_point realtime_start_time =
194 aos::realtime_clock::min_time;
195
196 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> log_file_header =
197 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader>::Empty();
198 };
Brian Silverman1f345222020-09-24 21:14:48 -0700199
200 void WriteHeader();
201 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> MakeHeader(
202 const Node *node);
203
204 bool MaybeUpdateTimestamp(
205 const Node *node, int node_index,
206 aos::monotonic_clock::time_point monotonic_start_time,
207 aos::realtime_clock::time_point realtime_start_time);
208
209 void DoLogData(const monotonic_clock::time_point end_time);
210
211 void WriteMissingTimestamps();
212
213 // Fetches from each channel until all the data is logged.
214 void LogUntil(monotonic_clock::time_point t);
215
Brian Silvermancb805822020-10-06 17:43:35 -0700216 void RecordFetchResult(aos::monotonic_clock::time_point start,
217 aos::monotonic_clock::time_point end, bool got_new,
218 FetcherStruct *fetcher);
219
220 void RecordCreateMessageTime(aos::monotonic_clock::time_point start,
221 aos::monotonic_clock::time_point end,
222 FetcherStruct *fetcher);
223
Brian Silverman1f345222020-09-24 21:14:48 -0700224 // Sets the start time for a specific node.
225 void SetStartTime(size_t node_index,
226 aos::monotonic_clock::time_point monotonic_start_time,
227 aos::realtime_clock::time_point realtime_start_time);
228
Brian Silvermanae7c0332020-09-30 16:58:23 -0700229 EventLoop *const event_loop_;
Brian Silverman1f345222020-09-24 21:14:48 -0700230 // The configuration to place at the top of the log file.
231 const Configuration *const configuration_;
232
Brian Silvermanae7c0332020-09-30 16:58:23 -0700233 UUID log_event_uuid_ = UUID::Zero();
234 const UUID logger_instance_uuid_ = UUID::Random();
235 std::unique_ptr<LogNamer> log_namer_;
236 // Empty indicates there isn't one.
237 std::string log_start_uuid_;
238 const std::string boot_uuid_;
239
Brian Silverman1f345222020-09-24 21:14:48 -0700240 // Name to save in the log file. Defaults to hostname.
241 std::string name_;
242
243 std::function<void()> on_logged_period_ = []() {};
244
Brian Silvermancb805822020-10-06 17:43:35 -0700245 std::chrono::nanoseconds max_message_fetch_time_ =
246 std::chrono::nanoseconds::zero();
247 int max_message_fetch_time_channel_ = -1;
248 int max_message_fetch_time_size_ = -1;
249 std::chrono::nanoseconds total_message_fetch_time_ =
250 std::chrono::nanoseconds::zero();
251 int total_message_fetch_count_ = 0;
252 int64_t total_message_fetch_bytes_ = 0;
253
254 std::chrono::nanoseconds total_nop_fetch_time_ =
255 std::chrono::nanoseconds::zero();
256 int total_nop_fetch_count_ = 0;
257
258 std::chrono::nanoseconds max_copy_time_ = std::chrono::nanoseconds::zero();
259 int max_copy_time_channel_ = -1;
260 int max_copy_time_size_ = -1;
261 std::chrono::nanoseconds total_copy_time_ = std::chrono::nanoseconds::zero();
262 int total_copy_count_ = 0;
263 int64_t total_copy_bytes_ = 0;
264
Brian Silverman1f345222020-09-24 21:14:48 -0700265 std::vector<FetcherStruct> fetchers_;
266 TimerHandler *timer_handler_;
267
268 // Period to poll the channels.
269 std::chrono::nanoseconds polling_period_ = std::chrono::milliseconds(100);
270
271 // Last time that data was written for all channels to disk.
272 monotonic_clock::time_point last_synchronized_time_;
273
274 // Max size that the header has consumed. This much extra data will be
275 // reserved in the builder to avoid reallocating.
276 size_t max_header_size_ = 0;
277
278 // Fetcher for all the statistics from all the nodes.
279 aos::Fetcher<message_bridge::ServerStatistics> server_statistics_fetcher_;
280
Austin Schuh2f8fd752020-09-01 22:38:28 -0700281 std::vector<NodeState> node_state_;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800282};
283
Austin Schuh11d43732020-09-21 17:28:30 -0700284std::vector<std::vector<std::string>> ToLogReaderVector(
285 const std::vector<LogFile> &log_files);
Austin Schuh5212cad2020-09-09 23:12:09 -0700286
Austin Schuh6f3babe2020-01-26 20:34:50 -0800287// We end up with one of the following 3 log file types.
288//
289// Single node logged as the source node.
290// -> Replayed just on the source node.
291//
292// Forwarding timestamps only logged from the perspective of the destination
293// node.
294// -> Matched with data on source node and logged.
295//
296// Forwarding timestamps with data logged as the destination node.
297// -> Replayed just as the destination
298// -> Replayed as the source (Much harder, ordering is not defined)
299//
300// Duplicate data logged. -> CHECK that it matches and explode otherwise.
301//
302// This can be boiled down to a set of constraints and tools.
303//
304// 1) Forwarding timestamps and data need to be logged separately.
305// 2) Any forwarded data logged on the destination node needs to be logged
306// separately such that it can be sorted.
307//
308// 1) Log reader needs to be able to sort a list of log files.
309// 2) Log reader needs to be able to merge sorted lists of log files.
310// 3) Log reader needs to be able to match timestamps with messages.
311//
312// We also need to be able to generate multiple views of a log file depending on
313// the target.
314
Austin Schuhe309d2a2019-11-29 13:25:21 -0800315// Replays all the channels in the logfile to the event loop.
316class LogReader {
317 public:
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800318 // If you want to supply a new configuration that will be used for replay
319 // (e.g., to change message rates, or to populate an updated schema), then
320 // pass it in here. It must provide all the channels that the original logged
321 // config did.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800322 //
Austin Schuh287d43d2020-12-04 20:19:33 -0800323 // The single file constructor calls SortParts internally.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800324 LogReader(std::string_view filename,
325 const Configuration *replay_configuration = nullptr);
Austin Schuh287d43d2020-12-04 20:19:33 -0800326 LogReader(std::vector<LogFile> log_files,
Austin Schuh11d43732020-09-21 17:28:30 -0700327 const Configuration *replay_configuration = nullptr);
James Kuszmaul7daef362019-12-31 18:28:17 -0800328 ~LogReader();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800329
Austin Schuh6331ef92020-01-07 18:28:09 -0800330 // Registers all the callbacks to send the log file data out on an event loop
331 // created in event_loop_factory. This also updates time to be at the start
332 // of the log file by running until the log file starts.
333 // Note: the configuration used in the factory should be configuration()
334 // below, but can be anything as long as the locations needed to send
335 // everything are available.
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800336 void Register(SimulatedEventLoopFactory *event_loop_factory);
Austin Schuh6331ef92020-01-07 18:28:09 -0800337 // Creates an SimulatedEventLoopFactory accessible via event_loop_factory(),
338 // and then calls Register.
339 void Register();
340 // Registers callbacks for all the events after the log file starts. This is
341 // only useful when replaying live.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800342 void Register(EventLoop *event_loop);
Austin Schuh6331ef92020-01-07 18:28:09 -0800343
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800344 // Unregisters the senders. You only need to call this if you separately
345 // supplied an event loop or event loop factory and the lifetimes are such
346 // that they need to be explicitly destroyed before the LogReader destructor
347 // gets called.
Austin Schuhe309d2a2019-11-29 13:25:21 -0800348 void Deregister();
349
Austin Schuh0c297012020-09-16 18:41:59 -0700350 // Returns the configuration being used for replay from the log file.
351 // Note that this may be different from the configuration actually used for
352 // handling events. You should generally only use this to create a
353 // SimulatedEventLoopFactory, and then get the configuration from there for
354 // everything else.
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800355 const Configuration *logged_configuration() const;
Austin Schuh11d43732020-09-21 17:28:30 -0700356 // Returns the configuration being used for replay from the log file.
357 // Note that this may be different from the configuration actually used for
358 // handling events. You should generally only use this to create a
359 // SimulatedEventLoopFactory, and then get the configuration from there for
360 // everything else.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800361 // The pointer is invalidated whenever RemapLoggedChannel is called.
Austin Schuh15649d62019-12-28 16:36:38 -0800362 const Configuration *configuration() const;
363
Austin Schuh6f3babe2020-01-26 20:34:50 -0800364 // Returns the nodes that this log file was created on. This is a list of
365 // pointers to a node in the nodes() list inside configuration(). The
366 // pointers here are invalidated whenever RemapLoggedChannel is called.
367 std::vector<const Node *> Nodes() const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800368
369 // Returns the starting timestamp for the log file.
Austin Schuh11d43732020-09-21 17:28:30 -0700370 monotonic_clock::time_point monotonic_start_time(
371 const Node *node = nullptr) const;
372 realtime_clock::time_point realtime_start_time(
373 const Node *node = nullptr) const;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800374
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800375 // Causes the logger to publish the provided channel on a different name so
376 // that replayed applications can publish on the proper channel name without
377 // interference. This operates on raw channel names, without any node or
378 // application specific mappings.
379 void RemapLoggedChannel(std::string_view name, std::string_view type,
Austin Schuh0de30f32020-12-06 12:44:28 -0800380 std::string_view add_prefix = "/original",
381 std::string_view new_type = "");
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800382 template <typename T>
383 void RemapLoggedChannel(std::string_view name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800384 std::string_view add_prefix = "/original",
385 std::string_view new_type = "") {
386 RemapLoggedChannel(name, T::GetFullyQualifiedName(), add_prefix, new_type);
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800387 }
388
Austin Schuh01b4c352020-09-21 23:09:39 -0700389 // Remaps the provided channel, though this respects node mappings, and
390 // preserves them too. This makes it so if /aos -> /pi1/aos on one node,
391 // /original/aos -> /original/pi1/aos on the same node after renaming, just
Austin Schuh0de30f32020-12-06 12:44:28 -0800392 // like you would hope. If new_type is not empty, the new channel will use
393 // the provided type instead. This allows for renaming messages.
Austin Schuh01b4c352020-09-21 23:09:39 -0700394 //
395 // TODO(austin): If you have 2 nodes remapping something to the same channel,
396 // this doesn't handle that. No use cases exist yet for that, so it isn't
397 // being done yet.
398 void RemapLoggedChannel(std::string_view name, std::string_view type,
399 const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800400 std::string_view add_prefix = "/original",
401 std::string_view new_type = "");
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700402 template <typename T>
Austin Schuh01b4c352020-09-21 23:09:39 -0700403 void RemapLoggedChannel(std::string_view name, const Node *node,
Austin Schuh0de30f32020-12-06 12:44:28 -0800404 std::string_view add_prefix = "/original",
405 std::string_view new_type = "") {
406 RemapLoggedChannel(name, T::GetFullyQualifiedName(), node, add_prefix,
407 new_type);
Austin Schuh01b4c352020-09-21 23:09:39 -0700408 }
409
410 template <typename T>
411 bool HasChannel(std::string_view name, const Node *node = nullptr) {
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700412 return configuration::GetChannel(log_file_header()->configuration(), name,
Austin Schuh0de30f32020-12-06 12:44:28 -0800413 T::GetFullyQualifiedName(), "", node,
414 true) != nullptr;
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700415 }
416
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800417 SimulatedEventLoopFactory *event_loop_factory() {
418 return event_loop_factory_;
419 }
420
Brian Silvermande9f3ff2020-04-28 16:56:58 -0700421 const LogFileHeader *log_file_header() const {
422 return &log_file_header_.message();
423 }
424
Austin Schuh0c297012020-09-16 18:41:59 -0700425 std::string_view name() const {
426 return log_file_header()->name()->string_view();
427 }
428
Austin Schuhe309d2a2019-11-29 13:25:21 -0800429 private:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800430 const Channel *RemapChannel(const EventLoop *event_loop,
431 const Channel *channel);
432
Austin Schuhe309d2a2019-11-29 13:25:21 -0800433 // Queues at least max_out_of_order_duration_ messages into channels_.
434 void QueueMessages();
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800435 // Handle constructing a configuration with all the additional remapped
436 // channels from calls to RemapLoggedChannel.
437 void MakeRemappedConfig();
Austin Schuhe309d2a2019-11-29 13:25:21 -0800438
Austin Schuh2f8fd752020-09-01 22:38:28 -0700439 // Returns the number of nodes.
440 size_t nodes_count() const {
441 return !configuration::MultiNode(logged_configuration())
442 ? 1u
443 : logged_configuration()->nodes()->size();
444 }
445
Austin Schuh287d43d2020-12-04 20:19:33 -0800446 const std::vector<LogFile> log_files_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800447
448 // This is *a* log file header used to provide the logged config. The rest of
449 // the header is likely distracting.
Austin Schuhadd6eb32020-11-09 21:24:26 -0800450 SizePrefixedFlatbufferVector<LogFileHeader> log_file_header_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800451
Austin Schuh2f8fd752020-09-01 22:38:28 -0700452 // Returns [ta; tb; ...] = tuple[0] * t + tuple[1]
453 std::tuple<Eigen::Matrix<double, Eigen::Dynamic, 1>,
454 Eigen::Matrix<double, Eigen::Dynamic, 1>>
455 SolveOffsets();
456
457 void LogFit(std::string_view prefix);
Austin Schuh8bd96322020-02-13 21:18:22 -0800458
Austin Schuh6f3babe2020-01-26 20:34:50 -0800459 // State per node.
Austin Schuh858c9f32020-08-31 16:56:12 -0700460 class State {
461 public:
Austin Schuh287d43d2020-12-04 20:19:33 -0800462 State(std::unique_ptr<TimestampMapper> timestamp_mapper);
463
464 // Connects up the timestamp mappers.
465 void AddPeer(State *peer);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800466
Austin Schuh858c9f32020-08-31 16:56:12 -0700467 // Returns the timestamps, channel_index, and message from a channel.
468 // update_time (will be) set to true when popping this message causes the
469 // filter to change the time offset estimation function.
Austin Schuh287d43d2020-12-04 20:19:33 -0800470 TimestampedMessage PopOldest(bool *update_time);
Austin Schuh858c9f32020-08-31 16:56:12 -0700471
472 // Returns the monotonic time of the oldest message.
473 monotonic_clock::time_point OldestMessageTime() const;
474
475 // Primes the queues inside State. Should be called before calling
476 // OldestMessageTime.
477 void SeedSortedMessages();
Austin Schuh8bd96322020-02-13 21:18:22 -0800478
Austin Schuh858c9f32020-08-31 16:56:12 -0700479 // Returns the starting time for this node.
480 monotonic_clock::time_point monotonic_start_time() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800481 return timestamp_mapper_ ? timestamp_mapper_->monotonic_start_time()
482 : monotonic_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700483 }
484 realtime_clock::time_point realtime_start_time() const {
Austin Schuh287d43d2020-12-04 20:19:33 -0800485 return timestamp_mapper_ ? timestamp_mapper_->realtime_start_time()
486 : realtime_clock::min_time;
Austin Schuh858c9f32020-08-31 16:56:12 -0700487 }
488
489 // Sets the node event loop factory for replaying into a
490 // SimulatedEventLoopFactory. Returns the EventLoop to use.
491 EventLoop *SetNodeEventLoopFactory(
492 NodeEventLoopFactory *node_event_loop_factory);
493
494 // Sets and gets the event loop to use.
495 void set_event_loop(EventLoop *event_loop) { event_loop_ = event_loop; }
496 EventLoop *event_loop() { return event_loop_; }
497
Austin Schuh858c9f32020-08-31 16:56:12 -0700498 // Sets the current realtime offset from the monotonic clock for this node
499 // (if we are on a simulated event loop).
500 void SetRealtimeOffset(monotonic_clock::time_point monotonic_time,
501 realtime_clock::time_point realtime_time) {
502 if (node_event_loop_factory_ != nullptr) {
503 node_event_loop_factory_->SetRealtimeOffset(monotonic_time,
504 realtime_time);
505 }
506 }
507
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700508 // Returns the MessageHeader sender to log delivery timestamps to for the
509 // provided remote node.
Austin Schuh0de30f32020-12-06 12:44:28 -0800510 aos::Sender<message_bridge::RemoteMessage> *RemoteTimestampSender(
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700511 const Node *delivered_node);
512
Austin Schuh858c9f32020-08-31 16:56:12 -0700513 // Converts a timestamp from the monotonic clock on this node to the
514 // distributed clock.
515 distributed_clock::time_point ToDistributedClock(
516 monotonic_clock::time_point time) {
517 return node_event_loop_factory_->ToDistributedClock(time);
518 }
519
Austin Schuh2f8fd752020-09-01 22:38:28 -0700520 monotonic_clock::time_point FromDistributedClock(
521 distributed_clock::time_point time) {
522 return node_event_loop_factory_->FromDistributedClock(time);
523 }
524
Austin Schuh858c9f32020-08-31 16:56:12 -0700525 // Sets the offset (and slope) from the distributed clock.
526 void SetDistributedOffset(std::chrono::nanoseconds distributed_offset,
527 double distributed_slope) {
528 node_event_loop_factory_->SetDistributedOffset(distributed_offset,
529 distributed_slope);
530 }
531
532 // Returns the current time on the remote node which sends messages on
533 // channel_index.
534 monotonic_clock::time_point monotonic_remote_now(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700535 return channel_source_state_[channel_index]
536 ->node_event_loop_factory_->monotonic_now();
Austin Schuh858c9f32020-08-31 16:56:12 -0700537 }
538
Austin Schuh2f8fd752020-09-01 22:38:28 -0700539 distributed_clock::time_point RemoteToDistributedClock(
540 size_t channel_index, monotonic_clock::time_point time) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700541 return channel_source_state_[channel_index]
542 ->node_event_loop_factory_->ToDistributedClock(time);
Austin Schuh2f8fd752020-09-01 22:38:28 -0700543 }
544
545 const Node *remote_node(size_t channel_index) {
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700546 return channel_source_state_[channel_index]
547 ->node_event_loop_factory_->node();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700548 }
549
550 monotonic_clock::time_point monotonic_now() {
551 return node_event_loop_factory_->monotonic_now();
552 }
553
Austin Schuh858c9f32020-08-31 16:56:12 -0700554 // Sets the number of channels.
555 void SetChannelCount(size_t count);
556
557 // Sets the sender, filter, and target factory for a channel.
Austin Schuh0de30f32020-12-06 12:44:28 -0800558 void SetChannel(
559 size_t logged_channel_index, size_t factory_channel_index,
560 std::unique_ptr<RawSender> sender,
561 message_bridge::NoncausalOffsetEstimator *filter,
562 aos::Sender<message_bridge::RemoteMessage> *remote_timestamp_sender,
563 State *source_state);
Austin Schuh858c9f32020-08-31 16:56:12 -0700564
565 // Returns if we have read all the messages from all the logs.
Austin Schuh287d43d2020-12-04 20:19:33 -0800566 bool at_end() const {
567 return timestamp_mapper_ ? timestamp_mapper_->Front() == nullptr : true;
568 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700569
570 // Unregisters everything so we can destory the event loop.
571 void Deregister();
572
573 // Sets the current TimerHandle for the replay callback.
574 void set_timer_handler(TimerHandler *timer_handler) {
575 timer_handler_ = timer_handler;
576 }
577
578 // Sets the next wakeup time on the replay callback.
579 void Setup(monotonic_clock::time_point next_time) {
580 timer_handler_->Setup(next_time);
581 }
582
583 // Sends a buffer on the provided channel index.
Austin Schuh287d43d2020-12-04 20:19:33 -0800584 bool Send(const TimestampedMessage &timestamped_message);
Austin Schuh858c9f32020-08-31 16:56:12 -0700585
586 // Returns a debug string for the channel merger.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700587 std::string DebugString() const {
588 std::stringstream messages;
589 size_t i = 0;
590 for (const auto &message : sorted_messages_) {
591 if (i < 7 || i + 7 > sorted_messages_.size()) {
592 messages << "sorted_messages[" << i
593 << "]: " << std::get<0>(message).monotonic_event_time << " "
594 << configuration::StrippedChannelToString(
595 event_loop_->configuration()->channels()->Get(
Austin Schuh287d43d2020-12-04 20:19:33 -0800596 std::get<0>(message).channel_index))
Austin Schuh2f8fd752020-09-01 22:38:28 -0700597 << "\n";
598 } else if (i == 7) {
599 messages << "...\n";
600 }
601 ++i;
602 }
Austin Schuh287d43d2020-12-04 20:19:33 -0800603 if (!timestamp_mapper_) {
604 return messages.str();
605 }
606 return messages.str() + timestamp_mapper_->DebugString();
Austin Schuh2f8fd752020-09-01 22:38:28 -0700607 }
Austin Schuh858c9f32020-08-31 16:56:12 -0700608
609 private:
610 // Log file.
Austin Schuh287d43d2020-12-04 20:19:33 -0800611 std::unique_ptr<TimestampMapper> timestamp_mapper_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700612
Austin Schuh287d43d2020-12-04 20:19:33 -0800613 std::deque<std::tuple<TimestampedMessage,
Austin Schuh2f8fd752020-09-01 22:38:28 -0700614 message_bridge::NoncausalOffsetEstimator *>>
Austin Schuh858c9f32020-08-31 16:56:12 -0700615 sorted_messages_;
616
617 // Senders.
618 std::vector<std::unique_ptr<RawSender>> channels_;
Austin Schuh0de30f32020-12-06 12:44:28 -0800619 std::vector<aos::Sender<message_bridge::RemoteMessage> *>
620 remote_timestamp_senders_;
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700621 // The mapping from logged channel index to sent channel index. Needed for
622 // sending out MessageHeaders.
623 std::vector<int> factory_channel_index_;
624
625 struct SentTimestamp {
626 monotonic_clock::time_point monotonic_event_time =
627 monotonic_clock::min_time;
628 realtime_clock::time_point realtime_event_time = realtime_clock::min_time;
629 uint32_t queue_index = 0xffffffff;
630
631 // The queue index that this message *actually* was sent with.
632 uint32_t actual_queue_index = 0xffffffff;
633 };
634
635 // Stores all the timestamps that have been sent on this channel. This is
636 // only done for channels which are forwarded and on the node which
637 // initially sends the message.
638 //
639 // TODO(austin): This whole concept is a hack. We should be able to
640 // associate state with the message as it gets sorted and recover it.
641 std::vector<std::unique_ptr<std::vector<SentTimestamp>>> queue_index_map_;
Austin Schuh858c9f32020-08-31 16:56:12 -0700642
643 // Factory (if we are in sim) that this loop was created on.
644 NodeEventLoopFactory *node_event_loop_factory_ = nullptr;
645 std::unique_ptr<EventLoop> event_loop_unique_ptr_;
646 // Event loop.
647 EventLoop *event_loop_ = nullptr;
648 // And timer used to send messages.
649 TimerHandler *timer_handler_;
650
Austin Schuh8bd96322020-02-13 21:18:22 -0800651 // Filters (or nullptr if it isn't a forwarded channel) for each channel.
652 // This corresponds to the object which is shared among all the channels
653 // going between 2 nodes. The second element in the tuple indicates if this
654 // is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700655 std::vector<message_bridge::NoncausalOffsetEstimator *> filters_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800656
657 // List of NodeEventLoopFactorys (or nullptr if it isn't a forwarded
658 // channel) which correspond to the originating node.
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700659 std::vector<State *> channel_source_state_;
660
Austin Schuh0de30f32020-12-06 12:44:28 -0800661 std::map<const Node *, aos::Sender<message_bridge::RemoteMessage>>
Austin Schuh8d7e0bb2020-10-02 17:57:00 -0700662 remote_timestamp_senders_map_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800663 };
664
Austin Schuh8bd96322020-02-13 21:18:22 -0800665 // Node index -> State.
666 std::vector<std::unique_ptr<State>> states_;
667
668 // Creates the requested filter if it doesn't exist, regardless of whether
669 // these nodes can actually communicate directly. The second return value
670 // reports if this is the primary direction or not.
Austin Schuh2f8fd752020-09-01 22:38:28 -0700671 message_bridge::NoncausalOffsetEstimator *GetFilter(const Node *node_a,
672 const Node *node_b);
Austin Schuh8bd96322020-02-13 21:18:22 -0800673
674 // FILE to write offsets to (if populated).
675 FILE *offset_fp_ = nullptr;
676 // Timestamp of the first piece of data used for the horizontal axis on the
677 // plot.
678 aos::realtime_clock::time_point first_time_;
679
680 // List of filters for a connection. The pointer to the first node will be
681 // less than the second node.
682 std::map<std::tuple<const Node *, const Node *>,
Austin Schuh2f8fd752020-09-01 22:38:28 -0700683 std::tuple<message_bridge::NoncausalOffsetEstimator>>
Austin Schuh8bd96322020-02-13 21:18:22 -0800684 filters_;
685
686 // Returns the offset from the monotonic clock for a node to the distributed
Austin Schuh2f8fd752020-09-01 22:38:28 -0700687 // clock. monotonic = distributed * slope() + offset();
688 double slope(int node_index) const {
689 CHECK_LT(node_index, time_slope_matrix_.rows())
James Kuszmaul46d82582020-05-09 19:50:09 -0700690 << ": Got too high of a node index.";
Austin Schuh2f8fd752020-09-01 22:38:28 -0700691 return time_slope_matrix_(node_index);
692 }
693 std::chrono::nanoseconds offset(int node_index) const {
694 CHECK_LT(node_index, time_offset_matrix_.rows())
695 << ": Got too high of a node index.";
696 return std::chrono::duration_cast<std::chrono::nanoseconds>(
697 std::chrono::duration<double>(time_offset_matrix_(node_index)));
Austin Schuh8bd96322020-02-13 21:18:22 -0800698 }
699
700 // Updates the offset matrix solution and sets the per-node distributed
701 // offsets in the factory.
702 void UpdateOffsets();
703
Austin Schuh2f8fd752020-09-01 22:38:28 -0700704 // We have 2 types of equations to do a least squares regression over to fully
705 // constrain our time function.
706 //
707 // One is simple. The distributed clock is the average of all the clocks.
Brian Silverman87ac0402020-09-17 14:47:01 -0700708 // (ta + tb + tc + td) / num_nodes = t_distributed
Austin Schuh2f8fd752020-09-01 22:38:28 -0700709 //
710 // The second is a bit more complicated. Our basic time conversion function
711 // is:
712 // tb = ta + (ta * slope + offset)
713 // We can rewrite this as follows
714 // tb - (1 + slope) * ta = offset
715 //
716 // From here, we have enough equations to solve for t{a,b,c,...} We want to
717 // take as an input the offsets and slope, and solve for the per-node times as
718 // a function of the distributed clock.
719 //
720 // We need to massage our equations to make this work. If we solve for the
721 // per-node times at two set distributed clock times, we will be able to
722 // recreate the linear function (we know it is linear). We can do a similar
723 // thing by breaking our equation up into:
Brian Silverman87ac0402020-09-17 14:47:01 -0700724 //
Austin Schuh2f8fd752020-09-01 22:38:28 -0700725 // [1/3 1/3 1/3 ] [ta] [t_distributed]
726 // [ 1 -1-m1 0 ] [tb] = [oab]
727 // [ 1 0 -1-m2 ] [tc] [oac]
728 //
729 // This solves to:
730 //
731 // [ta] [ a00 a01 a02] [t_distributed]
732 // [tb] = [ a10 a11 a12] * [oab]
733 // [tc] [ a20 a21 a22] [oac]
734 //
735 // and can be split into:
736 //
737 // [ta] [ a00 ] [a01 a02]
738 // [tb] = [ a10 ] * t_distributed + [a11 a12] * [oab]
739 // [tc] [ a20 ] [a21 a22] [oac]
740 //
741 // (map_matrix_ + slope_matrix_) * [ta; tb; tc] = [offset_matrix_];
742 // offset_matrix_ will be in nanoseconds.
743 Eigen::Matrix<mpq_class, Eigen::Dynamic, Eigen::Dynamic> map_matrix_;
744 Eigen::Matrix<mpq_class, Eigen::Dynamic, Eigen::Dynamic> slope_matrix_;
745 Eigen::Matrix<mpq_class, Eigen::Dynamic, 1> offset_matrix_;
746 // Matrix tracking which offsets are valid.
747 Eigen::Matrix<bool, Eigen::Dynamic, 1> valid_matrix_;
748 // Matrix tracking the last valid matrix we used to determine connected nodes.
749 Eigen::Matrix<bool, Eigen::Dynamic, 1> last_valid_matrix_;
750 size_t cached_valid_node_count_ = 0;
Austin Schuh8bd96322020-02-13 21:18:22 -0800751
Austin Schuh2f8fd752020-09-01 22:38:28 -0700752 // [ta; tb; tc] = time_slope_matrix_ * t + time_offset_matrix;
753 // t is in seconds.
754 Eigen::Matrix<double, Eigen::Dynamic, 1> time_slope_matrix_;
755 Eigen::Matrix<double, Eigen::Dynamic, 1> time_offset_matrix_;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800756
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800757 std::unique_ptr<FlatbufferDetachedBuffer<Configuration>>
758 remapped_configuration_buffer_;
759
James Kuszmaul84ff3e52020-01-03 19:48:53 -0800760 std::unique_ptr<SimulatedEventLoopFactory> event_loop_factory_unique_ptr_;
761 SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800762
763 // Map of channel indices to new name. The channel index will be an index into
764 // logged_configuration(), and the string key will be the name of the channel
765 // to send on instead of the logged channel name.
Austin Schuh0de30f32020-12-06 12:44:28 -0800766 struct RemappedChannel {
767 std::string remapped_name;
768 std::string new_type;
769 };
770 std::map<size_t, RemappedChannel> remapped_channels_;
Austin Schuh01b4c352020-09-21 23:09:39 -0700771 std::vector<MapT> maps_;
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800772
Austin Schuh6f3babe2020-01-26 20:34:50 -0800773 // Number of nodes which still have data to send. This is used to figure out
774 // when to exit.
775 size_t live_nodes_ = 0;
776
James Kuszmaulc7bbb3e2020-01-03 20:01:00 -0800777 const Configuration *remapped_configuration_ = nullptr;
778 const Configuration *replay_configuration_ = nullptr;
Austin Schuhcde938c2020-02-02 17:30:07 -0800779
780 // If true, the replay timer will ignore any missing data. This is used
781 // during startup when we are bootstrapping everything and trying to get to
782 // the start of all the log files.
783 bool ignore_missing_data_ = false;
Austin Schuhe309d2a2019-11-29 13:25:21 -0800784};
785
786} // namespace logger
787} // namespace aos
788
789#endif // AOS_EVENTS_LOGGER_H_