blob: 6d88aa69319974653e0ac13470561b9c4facc5a5 [file] [log] [blame]
Alex Perrycb7da4b2019-08-28 19:35:56 -07001#ifndef AOS_EVENTS_SIMULATED_EVENT_LOOP_H_
2#define AOS_EVENTS_SIMULATED_EVENT_LOOP_H_
3
4#include <algorithm>
Brian Silverman601b9722020-06-18 14:33:43 -07005#include <functional>
Alex Perrycb7da4b2019-08-28 19:35:56 -07006#include <map>
7#include <memory>
Austin Schuh5f1cc5c2019-12-01 18:01:11 -08008#include <string_view>
Alex Perrycb7da4b2019-08-28 19:35:56 -07009#include <unordered_set>
10#include <utility>
11#include <vector>
12
13#include "absl/container/btree_map.h"
Philipp Schrader790cb542023-07-05 21:06:52 -070014#include "glog/logging.h"
15
Alex Perrycb7da4b2019-08-28 19:35:56 -070016#include "aos/events/event_loop.h"
17#include "aos/events/event_scheduler.h"
Austin Schuhe1dafe42020-01-06 21:12:03 -080018#include "aos/events/simple_channel.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070019#include "aos/flatbuffer_merge.h"
20#include "aos/flatbuffers.h"
21#include "aos/ipc_lib/index.h"
Austin Schuh4385b142021-03-14 21:31:13 -070022#include "aos/uuid.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070023
24namespace aos {
25
26// Class for simulated fetchers.
27class SimulatedChannel;
28
Austin Schuhac0771c2020-01-07 18:36:30 -080029class NodeEventLoopFactory;
Austin Schuh057d29f2021-08-21 23:05:15 -070030class SimulatedEventLoop;
Brian Silvermane1fe2512022-08-14 23:18:50 -070031class SimulatedFactoryExitHandle;
Austin Schuh898f4972020-01-11 17:21:25 -080032namespace message_bridge {
33class SimulatedMessageBridge;
34}
Austin Schuhac0771c2020-01-07 18:36:30 -080035
36// There are 2 concepts needed to support multi-node simulations.
37// 1) The node. This is implemented with NodeEventLoopFactory.
38// 2) The "robot" which runs multiple nodes. This is implemented with
39// SimulatedEventLoopFactory.
40//
41// To make things easier, SimulatedEventLoopFactory takes an optional Node
42// argument if you want to make event loops without interacting with the
43// NodeEventLoopFactory object.
44//
45// The basic flow goes something like as follows:
46//
47// SimulatedEventLoopFactory factory(config);
48// const Node *pi1 = configuration::GetNode(factory.configuration(), "pi1");
49// std::unique_ptr<EventLoop> event_loop = factory.MakeEventLoop("ping", pi1);
50//
51// Or
52//
53// SimulatedEventLoopFactory factory(config);
54// const Node *pi1 = configuration::GetNode(factory.configuration(), "pi1");
55// NodeEventLoopFactory *pi1_factory = factory.GetNodeEventLoopFactory(pi1);
56// std::unique_ptr<EventLoop> event_loop = pi1_factory.MakeEventLoop("ping");
57//
58// The distributed_clock is used to be the base time. NodeEventLoopFactory has
59// all the information needed to adjust both the realtime and monotonic clocks
60// relative to the distributed_clock.
Alex Perrycb7da4b2019-08-28 19:35:56 -070061class SimulatedEventLoopFactory {
62 public:
63 // Constructs a SimulatedEventLoopFactory with the provided configuration.
64 // This configuration must remain in scope for the lifetime of the factory and
65 // all sub-objects.
66 SimulatedEventLoopFactory(const Configuration *configuration);
67 ~SimulatedEventLoopFactory();
68
Austin Schuh58646e22021-08-23 23:51:46 -070069 SimulatedEventLoopFactory(const SimulatedEventLoopFactory &) = delete;
70 SimulatedEventLoopFactory &operator=(const SimulatedEventLoopFactory &) =
71 delete;
72 SimulatedEventLoopFactory(SimulatedEventLoopFactory &&) = delete;
73 SimulatedEventLoopFactory &operator=(SimulatedEventLoopFactory &&) = delete;
74
Austin Schuhac0771c2020-01-07 18:36:30 -080075 // Creates an event loop. If running in a multi-node environment, node needs
76 // to point to the node to create this event loop on.
77 ::std::unique_ptr<EventLoop> MakeEventLoop(std::string_view name,
78 const Node *node = nullptr);
79
80 // Returns the NodeEventLoopFactory for the provided node. The returned
81 // NodeEventLoopFactory is owned by the SimulatedEventLoopFactory and has a
82 // lifetime identical to the factory.
83 NodeEventLoopFactory *GetNodeEventLoopFactory(const Node *node);
Austin Schuh057d29f2021-08-21 23:05:15 -070084 NodeEventLoopFactory *GetNodeEventLoopFactory(std::string_view node);
Alex Perrycb7da4b2019-08-28 19:35:56 -070085
Austin Schuh87dd3832021-01-01 23:07:31 -080086 // Sets the time converter for all nodes.
87 void SetTimeConverter(TimeConverter *time_converter);
88
Austin Schuh58646e22021-08-23 23:51:46 -070089 // Starts executing the event loops unconditionally until Exit is called or
90 // all the nodes have shut down.
Alex Perrycb7da4b2019-08-28 19:35:56 -070091 void Run();
92 // Executes the event loops for a duration.
Austin Schuhac0771c2020-01-07 18:36:30 -080093 void RunFor(distributed_clock::duration duration);
Alex Perrycb7da4b2019-08-28 19:35:56 -070094
95 // Stops executing all event loops. Meant to be called from within an event
96 // loop handler.
Austin Schuh8fb315a2020-11-19 22:33:58 -080097 void Exit();
Alex Perrycb7da4b2019-08-28 19:35:56 -070098
Brian Silvermane1fe2512022-08-14 23:18:50 -070099 std::unique_ptr<ExitHandle> MakeExitHandle();
100
Austin Schuhac0771c2020-01-07 18:36:30 -0800101 const std::vector<const Node *> &nodes() const { return nodes_; }
102
103 // Sets the simulated send delay for all messages sent within a single node.
Austin Schuh7d87b672019-12-01 20:23:49 -0800104 void set_send_delay(std::chrono::nanoseconds send_delay);
Austin Schuhac0771c2020-01-07 18:36:30 -0800105 std::chrono::nanoseconds send_delay() const { return send_delay_; }
106
107 // Sets the simulated network delay for messages forwarded between nodes.
Brian Silvermana7c62052020-04-28 16:52:27 -0700108 void set_network_delay(std::chrono::nanoseconds network_delay) {
109 network_delay_ = network_delay;
110 }
Austin Schuhac0771c2020-01-07 18:36:30 -0800111 std::chrono::nanoseconds network_delay() const { return network_delay_; }
112
113 // Returns the clock used to synchronize the nodes.
114 distributed_clock::time_point distributed_now() const {
Austin Schuh8bd96322020-02-13 21:18:22 -0800115 return scheduler_scheduler_.distributed_now();
Austin Schuhac0771c2020-01-07 18:36:30 -0800116 }
117
118 // Returns the configuration used for everything.
119 const Configuration *configuration() const { return configuration_; }
120
Austin Schuh6f3babe2020-01-26 20:34:50 -0800121 // Disables forwarding for this channel. This should be used very rarely only
122 // for things like the logger.
123 void DisableForwarding(const Channel *channel);
124
Austin Schuh4c3b9702020-08-30 11:34:55 -0700125 // Disables the messages sent by the simulated message gateway.
126 void DisableStatistics();
James Kuszmaul94ca5132022-07-19 09:11:08 -0700127 // Disables statistics sent by the simulated message gateway, and prevents
128 // EnableStatistcs from ever being called again (used by LogReader).
129 void PermanentlyDisableStatistics();
Austin Schuh48205e62021-11-12 14:13:18 -0800130 // Enables the messages sent by the simulated message gateway.
131 void EnableStatistics();
Austin Schuh4c3b9702020-08-30 11:34:55 -0700132
Austin Schuh2928ebe2021-02-07 22:10:27 -0800133 // Calls SkipTimingReport() on all EventLoops used as part of the
134 // infrastructure. This may improve the performance of long-simulated-duration
135 // tests.
136 void SkipTimingReport();
137
Austin Schuhe33c08d2022-02-03 18:15:21 -0800138 // Re-enables application creation for the duration of fn. This is mostly to
139 // allow use cases like log reading to create applications after the node
140 // starts up without stopping execution.
141 void AllowApplicationCreationDuring(std::function<void()> fn);
142
James Kuszmaulb67409b2022-06-20 16:25:03 -0700143 // Sets the realtime replay rate. A value of 1.0 will cause the scheduler to
144 // try to play events in realtime. 0.5 will run at half speed. Use infinity
145 // (the default) to run as fast as possible. This can be changed during
146 // run-time.
147 void SetRealtimeReplayRate(double replay_rate);
148
149 // Access to the internal scheduler's epoll object for realtime replay.
150 internal::EPoll *scheduler_epoll() { return scheduler_scheduler_.epoll(); }
151
Austin Schuhac0771c2020-01-07 18:36:30 -0800152 private:
Austin Schuhc0b0f722020-12-12 18:36:06 -0800153 friend class NodeEventLoopFactory;
Brian Silvermane1fe2512022-08-14 23:18:50 -0700154 friend class SimulatedFactoryExitHandle;
Austin Schuhc0b0f722020-12-12 18:36:06 -0800155
Austin Schuhac0771c2020-01-07 18:36:30 -0800156 const Configuration *const configuration_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800157 EventSchedulerScheduler scheduler_scheduler_;
Austin Schuhac0771c2020-01-07 18:36:30 -0800158
159 std::chrono::nanoseconds send_delay_ = std::chrono::microseconds(50);
160 std::chrono::nanoseconds network_delay_ = std::chrono::microseconds(100);
161
Austin Schuh58646e22021-08-23 23:51:46 -0700162 std::unique_ptr<message_bridge::SimulatedMessageBridge> bridge_;
163
Austin Schuhac0771c2020-01-07 18:36:30 -0800164 std::vector<std::unique_ptr<NodeEventLoopFactory>> node_factories_;
165
166 std::vector<const Node *> nodes_;
Brian Silvermane1fe2512022-08-14 23:18:50 -0700167
168 int exit_handle_count_ = 0;
Austin Schuhac0771c2020-01-07 18:36:30 -0800169};
170
171// This class holds all the state required to be a single node.
172class NodeEventLoopFactory {
173 public:
Austin Schuh057d29f2021-08-21 23:05:15 -0700174 ~NodeEventLoopFactory();
175
James Kuszmaul890c2492022-04-06 14:59:31 -0700176 // Whether a given event loop should have its senders checked for messages
177 // being sent too fast. Should only be used by the LogReader or other highly
178 // specialized applications that need to be able to bypass normal behaviors.
179 enum class CheckSentTooFast { kNo, kYes };
180 // Whether the created EventLoop should be the only one allowed to send on all
181 // of its channels. Mostly useful for the LogReader, to allow us to confirm
182 // whether the LogReader is conflicting with the output of any applications
183 // being run in replay.
184 enum class ExclusiveSenders { kNo, kYes };
185 struct EventLoopOptions {
186 CheckSentTooFast check_sent_too_fast;
187 ExclusiveSenders exclusive_senders;
James Kuszmaul94ca5132022-07-19 09:11:08 -0700188 // per_channel_exclusivity is used to list any exceptions to the overall
189 // exclusive_senders policy for this event loop.
190 std::vector<std::pair<const aos::Channel *, ExclusiveSenders>>
191 per_channel_exclusivity;
James Kuszmaul890c2492022-04-06 14:59:31 -0700192 };
193
194 // Takes the name for the event loop and a struct of options for selecting
195 // what checks to run for the event loop in question.
196 std::unique_ptr<EventLoop> MakeEventLoop(
197 std::string_view name,
James Kuszmaul94ca5132022-07-19 09:11:08 -0700198 EventLoopOptions options = EventLoopOptions{
199 CheckSentTooFast::kYes, ExclusiveSenders::kNo, {}});
Austin Schuh7d87b672019-12-01 20:23:49 -0800200
Austin Schuh217a9782019-12-21 23:02:50 -0800201 // Returns the node that this factory is running as, or nullptr if this is a
202 // single node setup.
203 const Node *node() const { return node_; }
204
Austin Schuh92547522019-12-28 14:33:43 -0800205 // Sets realtime clock to realtime_now for a given monotonic clock.
206 void SetRealtimeOffset(monotonic_clock::time_point monotonic_now,
207 realtime_clock::time_point realtime_now) {
Austin Schuhac0771c2020-01-07 18:36:30 -0800208 realtime_offset_ =
209 realtime_now.time_since_epoch() - monotonic_now.time_since_epoch();
Austin Schuh92547522019-12-28 14:33:43 -0800210 }
211
Austin Schuhac0771c2020-01-07 18:36:30 -0800212 // Returns the current time on both clocks.
213 inline monotonic_clock::time_point monotonic_now() const;
214 inline realtime_clock::time_point realtime_now() const;
Austin Schuh58646e22021-08-23 23:51:46 -0700215 inline distributed_clock::time_point distributed_now() const;
Austin Schuh39788ff2019-12-01 18:22:57 -0800216
Austin Schuhfaec5e12020-11-05 17:39:55 -0800217 const Configuration *configuration() const {
218 return factory_->configuration();
219 }
220
Austin Schuh58646e22021-08-23 23:51:46 -0700221 // Starts the node up by calling the OnStartup handlers. These get called
222 // every time a node is started.
223
James Kuszmaul82c3b512023-07-08 20:25:41 -0700224 // Called when a node has started. This will most commonly be at the monotonic
225 // clock epoch. In log replay, this will occur prior to any messages
226 // (including "fetched" messages) being replayed for that node.
227 // Called on every boot.
Austin Schuh58646e22021-08-23 23:51:46 -0700228 void OnStartup(std::function<void()> &&fn);
229
230 // Called when a node shuts down. These get called every time a node is shut
231 // down. All applications are destroyed right after the last OnShutdown
232 // callback is called.
233 void OnShutdown(std::function<void()> &&fn);
234
235 // Starts an application if the configuration says it should be started on
236 // this node. name is the name of the application. args are the constructor
237 // args for the Main class. Returns a pointer to the class that was started
238 // if it was started, or nullptr.
239 template <class Main, class... Args>
Philipp Schrader790cb542023-07-05 21:06:52 -0700240 Main *MaybeStart(std::string_view name, Args &&...args);
Austin Schuh58646e22021-08-23 23:51:46 -0700241
242 // Starts an application regardless of if the config says to or not. name is
243 // the name of the application, and args are the constructor args for the
244 // application. Returns a pointer to the class that was started.
245 template <class Main, class... Args>
Philipp Schrader790cb542023-07-05 21:06:52 -0700246 Main *AlwaysStart(std::string_view name, Args &&...args);
Austin Schuh58646e22021-08-23 23:51:46 -0700247
Austin Schuh898f4972020-01-11 17:21:25 -0800248 // Returns the simulated network delay for messages forwarded between nodes.
249 std::chrono::nanoseconds network_delay() const {
250 return factory_->network_delay();
251 }
252 // Returns the simulated send delay for all messages sent within a single
253 // node.
254 std::chrono::nanoseconds send_delay() const { return factory_->send_delay(); }
255
Austin Schuh58646e22021-08-23 23:51:46 -0700256 size_t boot_count() const { return scheduler_.boot_count(); }
257
James Kuszmaul80d6c422023-01-06 14:16:04 -0800258 bool is_running() const { return scheduler_.is_running(); }
259
Austin Schuh8bd96322020-02-13 21:18:22 -0800260 // TODO(austin): Private for the following?
261
Austin Schuhac0771c2020-01-07 18:36:30 -0800262 // Converts a time to the distributed clock for scheduling and cross-node time
263 // measurement.
Austin Schuh87dd3832021-01-01 23:07:31 -0800264 // Note: converting time too far in the future can cause problems when
265 // replaying logs. Only convert times in the present or near past.
Austin Schuhac0771c2020-01-07 18:36:30 -0800266 inline distributed_clock::time_point ToDistributedClock(
267 monotonic_clock::time_point time) const;
Austin Schuh58646e22021-08-23 23:51:46 -0700268 inline logger::BootTimestamp FromDistributedClock(
Austin Schuhbe69cf32020-08-27 11:38:33 -0700269 distributed_clock::time_point time) const;
Austin Schuhac0771c2020-01-07 18:36:30 -0800270
Austin Schuh87dd3832021-01-01 23:07:31 -0800271 // Sets the class used to convert time. This pointer must out-live the
272 // SimulatedEventLoopFactory.
273 void SetTimeConverter(TimeConverter *time_converter) {
274 scheduler_.SetTimeConverter(
275 configuration::GetNodeIndex(factory_->configuration(), node_),
276 time_converter);
Austin Schuhcde938c2020-02-02 17:30:07 -0800277 }
278
Austin Schuh20ac95d2020-12-05 17:24:19 -0800279 // Returns the boot UUID for this node.
Austin Schuh58646e22021-08-23 23:51:46 -0700280 const UUID &boot_uuid() {
281 if (boot_uuid_ == UUID::Zero()) {
282 boot_uuid_ = scheduler_.boot_uuid();
283 }
284 return boot_uuid_;
285 }
Austin Schuh20ac95d2020-12-05 17:24:19 -0800286
Austin Schuhc0b0f722020-12-12 18:36:06 -0800287 // Stops forwarding messages to the other node, and reports disconnected in
288 // the ServerStatistics message for this node, and the ClientStatistics for
289 // the other node.
290 void Disconnect(const Node *other);
291 // Resumes forwarding messages.
292 void Connect(const Node *other);
293
Austin Schuh48205e62021-11-12 14:13:18 -0800294 // Disables the messages sent by the simulated message gateway.
295 void DisableStatistics();
296 // Enables the messages sent by the simulated message gateway.
297 void EnableStatistics();
298
Austin Schuhac0771c2020-01-07 18:36:30 -0800299 private:
300 friend class SimulatedEventLoopFactory;
Austin Schuh057d29f2021-08-21 23:05:15 -0700301 NodeEventLoopFactory(EventSchedulerScheduler *scheduler_scheduler,
302 SimulatedEventLoopFactory *factory, const Node *node);
Austin Schuhac0771c2020-01-07 18:36:30 -0800303
Austin Schuh48205e62021-11-12 14:13:18 -0800304 // Skips timing reports on all event loops on this node.
305 void SkipTimingReport();
306
Austin Schuh58646e22021-08-23 23:51:46 -0700307 // Helpers to restart.
308 void ScheduleStartup();
309 void Startup();
310 void Shutdown();
311
Austin Schuh8bd96322020-02-13 21:18:22 -0800312 EventScheduler scheduler_;
Austin Schuhac0771c2020-01-07 18:36:30 -0800313 SimulatedEventLoopFactory *const factory_;
Austin Schuh7d87b672019-12-01 20:23:49 -0800314
Austin Schuh58646e22021-08-23 23:51:46 -0700315 UUID boot_uuid_ = UUID::Zero();
Austin Schuh20ac95d2020-12-05 17:24:19 -0800316
Austin Schuh217a9782019-12-21 23:02:50 -0800317 const Node *const node_;
318
Austin Schuh48205e62021-11-12 14:13:18 -0800319 bool skip_timing_report_ = false;
320
Austin Schuh057d29f2021-08-21 23:05:15 -0700321 std::vector<SimulatedEventLoop *> event_loops_;
Austin Schuhac0771c2020-01-07 18:36:30 -0800322
Austin Schuhac0771c2020-01-07 18:36:30 -0800323 std::chrono::nanoseconds realtime_offset_ = std::chrono::seconds(0);
324
325 // Map from name, type to queue.
326 absl::btree_map<SimpleChannel, std::unique_ptr<SimulatedChannel>> channels_;
327
328 // pid so we get unique timing reports.
Austin Schuh39788ff2019-12-01 18:22:57 -0800329 pid_t tid_ = 0;
Austin Schuh58646e22021-08-23 23:51:46 -0700330
331 // True if we are started.
332 bool started_ = false;
333
334 std::vector<std::function<void()>> pending_on_startup_;
335 std::vector<std::function<void()>> on_startup_;
336 std::vector<std::function<void()>> on_shutdown_;
337
338 // Base class for an application to start. This shouldn't be used directly.
339 struct Application {
340 Application(NodeEventLoopFactory *node_factory, std::string_view name)
341 : event_loop(node_factory->MakeEventLoop(name)) {}
342 virtual ~Application() {}
343
344 std::unique_ptr<EventLoop> event_loop;
345 };
346
347 // Subclass to do type erasure for the base class. Holds an instance of a
348 // specific class. Use SimulationStarter instead.
349 template <typename Main>
350 struct TypedApplication : public Application {
351 // Constructs an Application by delegating the arguments used to construct
352 // the event loop to Application and the rest of the args to the actual
353 // application.
354 template <class... Args>
355 TypedApplication(NodeEventLoopFactory *node_factory, std::string_view name,
Philipp Schrader790cb542023-07-05 21:06:52 -0700356 Args &&...args)
Austin Schuh58646e22021-08-23 23:51:46 -0700357 : Application(node_factory, name),
358 main(event_loop.get(), std::forward<Args>(args)...) {
359 VLOG(1) << node_factory->scheduler_.distributed_now() << " "
360 << (node_factory->node() == nullptr
361 ? ""
362 : node_factory->node()->name()->str() + " ")
363 << node_factory->monotonic_now() << " Starting Application \""
364 << name << "\"";
365 }
366 ~TypedApplication() override {}
367
368 Main main;
369 };
370
371 std::vector<std::unique_ptr<Application>> applications_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700372};
373
Austin Schuh58646e22021-08-23 23:51:46 -0700374template <class Main, class... Args>
Philipp Schrader790cb542023-07-05 21:06:52 -0700375Main *NodeEventLoopFactory::MaybeStart(std::string_view name, Args &&...args) {
Austin Schuh58646e22021-08-23 23:51:46 -0700376 const aos::Application *application =
377 configuration::GetApplication(configuration(), node(), name);
378
379 if (application != nullptr) {
380 return AlwaysStart<Main>(name, std::forward<Args>(args)...);
381 }
382 return nullptr;
383}
384
385template <class Main, class... Args>
Philipp Schrader790cb542023-07-05 21:06:52 -0700386Main *NodeEventLoopFactory::AlwaysStart(std::string_view name, Args &&...args) {
Austin Schuh58646e22021-08-23 23:51:46 -0700387 std::unique_ptr<TypedApplication<Main>> app =
388 std::make_unique<TypedApplication<Main>>(this, name,
389 std::forward<Args>(args)...);
390 Main *main_ptr = &app->main;
391 applications_.emplace_back(std::move(app));
392 return main_ptr;
393}
394
Austin Schuhac0771c2020-01-07 18:36:30 -0800395inline monotonic_clock::time_point NodeEventLoopFactory::monotonic_now() const {
Austin Schuh8bd96322020-02-13 21:18:22 -0800396 // TODO(austin): Confirm that time never goes backwards?
Austin Schuhbe69cf32020-08-27 11:38:33 -0700397 return scheduler_.monotonic_now();
Austin Schuhac0771c2020-01-07 18:36:30 -0800398}
399
400inline realtime_clock::time_point NodeEventLoopFactory::realtime_now() const {
401 return realtime_clock::time_point(monotonic_now().time_since_epoch() +
402 realtime_offset_);
403}
404
Austin Schuh58646e22021-08-23 23:51:46 -0700405inline distributed_clock::time_point NodeEventLoopFactory::distributed_now()
406 const {
407 return scheduler_.distributed_now();
408}
409
410inline logger::BootTimestamp NodeEventLoopFactory::FromDistributedClock(
Austin Schuhbe69cf32020-08-27 11:38:33 -0700411 distributed_clock::time_point time) const {
412 return scheduler_.FromDistributedClock(time);
413}
414
Austin Schuhac0771c2020-01-07 18:36:30 -0800415inline distributed_clock::time_point NodeEventLoopFactory::ToDistributedClock(
416 monotonic_clock::time_point time) const {
Austin Schuh8bd96322020-02-13 21:18:22 -0800417 return scheduler_.ToDistributedClock(time);
Austin Schuhac0771c2020-01-07 18:36:30 -0800418}
419
Alex Perrycb7da4b2019-08-28 19:35:56 -0700420} // namespace aos
421
422#endif // AOS_EVENTS_SIMULATED_EVENT_LOOP_H_