blob: b14d0f83ed725b464ce9575d84a374c495349be1 [file] [log] [blame]
Alex Perrycb7da4b2019-08-28 19:35:56 -07001#ifndef AOS_EVENTS_EVENT_SCHEDULER_H_
2#define AOS_EVENTS_EVENT_SCHEDULER_H_
3
4#include <algorithm>
5#include <map>
6#include <memory>
7#include <unordered_set>
8#include <utility>
9#include <vector>
10
James Kuszmaulb67409b2022-06-20 16:25:03 -070011#include "aos/events/epoll.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070012#include "aos/events/event_loop.h"
Austin Schuh58646e22021-08-23 23:51:46 -070013#include "aos/events/logging/boot_timestamp.h"
Austin Schuh8bd96322020-02-13 21:18:22 -080014#include "aos/logging/implementations.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070015#include "aos/time/time.h"
16#include "glog/logging.h"
17
18namespace aos {
19
Austin Schuhac0771c2020-01-07 18:36:30 -080020// This clock is the basis for distributed time. It is used to synchronize time
21// between multiple nodes. This is a new type so conversions to and from the
22// monotonic and realtime clocks aren't implicit.
23class distributed_clock {
24 public:
25 typedef ::std::chrono::nanoseconds::rep rep;
26 typedef ::std::chrono::nanoseconds::period period;
27 typedef ::std::chrono::nanoseconds duration;
28 typedef ::std::chrono::time_point<distributed_clock> time_point;
29
30 // This clock is the base clock for the simulation and everything is synced to
31 // it. It never jumps.
32 static constexpr bool is_steady = true;
33
34 // Returns the epoch (0).
35 static constexpr time_point epoch() { return time_point(zero()); }
36
37 static constexpr duration zero() { return duration(0); }
38
39 static constexpr time_point min_time{
40 time_point(duration(::std::numeric_limits<duration::rep>::min()))};
41 static constexpr time_point max_time{
42 time_point(duration(::std::numeric_limits<duration::rep>::max()))};
43};
44
45std::ostream &operator<<(std::ostream &stream,
46 const aos::distributed_clock::time_point &now);
47
Austin Schuha9abc032021-01-01 16:46:19 -080048// Interface to handle converting time on a node to and from the distributed
49// clock accurately.
50class TimeConverter {
51 public:
52 virtual ~TimeConverter() {}
53
Austin Schuh58646e22021-08-23 23:51:46 -070054 // Returns the boot UUID for a node and boot. Note: the boot UUID for
55 // subsequent calls needs to be the same each time.
56 virtual UUID boot_uuid(size_t node_index, size_t boot_count) = 0;
57
58 void set_reboot_found(
59 std::function<void(distributed_clock::time_point,
60 const std::vector<logger::BootTimestamp> &)>
61 fn) {
62 reboot_found_ = fn;
63 }
64
Austin Schuha9abc032021-01-01 16:46:19 -080065 // Converts a time to the distributed clock for scheduling and cross-node
66 // time measurement.
67 virtual distributed_clock::time_point ToDistributedClock(
Austin Schuh58646e22021-08-23 23:51:46 -070068 size_t node_index, logger::BootTimestamp time) = 0;
Austin Schuha9abc032021-01-01 16:46:19 -080069
70 // Takes the distributed time and converts it to the monotonic clock for this
71 // node.
Austin Schuh58646e22021-08-23 23:51:46 -070072 virtual logger::BootTimestamp FromDistributedClock(
73 size_t node_index, distributed_clock::time_point time,
74 size_t boot_count) = 0;
Austin Schuhb7c8d2a2021-07-19 19:22:12 -070075
76 // Called whenever time passes this point and we can forget about it.
77 virtual void ObserveTimePassed(distributed_clock::time_point time) = 0;
Austin Schuh58646e22021-08-23 23:51:46 -070078
79 protected:
80 std::function<void(distributed_clock::time_point,
81 const std::vector<logger::BootTimestamp> &)>
82 reboot_found_;
Austin Schuha9abc032021-01-01 16:46:19 -080083};
84
Austin Schuh8bd96322020-02-13 21:18:22 -080085class EventSchedulerScheduler;
86
Alex Perrycb7da4b2019-08-28 19:35:56 -070087class EventScheduler {
88 public:
Austin Schuhef8f1ae2021-12-11 12:35:05 -080089 class Event {
90 public:
91 virtual void Handle() noexcept = 0;
92 virtual ~Event() {}
93 };
94
95 using ChannelType = std::multimap<monotonic_clock::time_point, Event *>;
Alex Perrycb7da4b2019-08-28 19:35:56 -070096 using Token = ChannelType::iterator;
Austin Schuh58646e22021-08-23 23:51:46 -070097 EventScheduler(size_t node_index) : node_index_(node_index) {}
Alex Perrycb7da4b2019-08-28 19:35:56 -070098
Austin Schuh87dd3832021-01-01 23:07:31 -080099 // Sets the time converter in use for this scheduler (and the corresponding
100 // node index)
101 void SetTimeConverter(size_t node_index, TimeConverter *converter) {
Austin Schuh58646e22021-08-23 23:51:46 -0700102 CHECK_EQ(node_index_, node_index);
Austin Schuh87dd3832021-01-01 23:07:31 -0800103 converter_ = converter;
104 }
105
Austin Schuhef8f1ae2021-12-11 12:35:05 -0800106 UUID boot_uuid() { return converter_->boot_uuid(node_index_, boot_count_); }
Austin Schuh58646e22021-08-23 23:51:46 -0700107
Alex Perrycb7da4b2019-08-28 19:35:56 -0700108 // Schedule an event with a callback function
109 // Returns an iterator to the event
Austin Schuhef8f1ae2021-12-11 12:35:05 -0800110 Token Schedule(monotonic_clock::time_point time, Event *callback);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700111
Austin Schuh39788ff2019-12-01 18:22:57 -0800112 // Schedules a callback when the event scheduler starts.
113 void ScheduleOnRun(std::function<void()> callback) {
114 on_run_.emplace_back(std::move(callback));
115 }
116
Austin Schuh057d29f2021-08-21 23:05:15 -0700117 // Schedules a callback when the event scheduler starts.
118 void ScheduleOnStartup(std::function<void()> callback) {
119 on_startup_.emplace_back(std::move(callback));
120 }
121
Austin Schuh58646e22021-08-23 23:51:46 -0700122 void set_on_shutdown(std::function<void()> callback) {
123 on_shutdown_ = std::move(callback);
124 }
125
126 void set_started(std::function<void()> callback) {
127 started_ = std::move(callback);
128 }
129
Austin Schuhe33c08d2022-02-03 18:15:21 -0800130 void set_stopped(std::function<void()> callback) {
131 stopped_ = std::move(callback);
132 }
133
Austin Schuh58646e22021-08-23 23:51:46 -0700134 std::function<void()> started_;
Austin Schuhe33c08d2022-02-03 18:15:21 -0800135 std::function<void()> stopped_;
Austin Schuh58646e22021-08-23 23:51:46 -0700136 std::function<void()> on_shutdown_;
137
Alex Perrycb7da4b2019-08-28 19:35:56 -0700138 Token InvalidToken() { return events_list_.end(); }
139
140 // Deschedule an event by its iterator
141 void Deschedule(Token token);
142
Austin Schuh8bd96322020-02-13 21:18:22 -0800143 // Runs the OnRun callbacks.
144 void RunOnRun();
Alex Perrycb7da4b2019-08-28 19:35:56 -0700145
Austin Schuh057d29f2021-08-21 23:05:15 -0700146 // Runs the OnStartup callbacks.
Austin Schuhe33c08d2022-02-03 18:15:21 -0800147 void RunOnStartup() noexcept;
Austin Schuh057d29f2021-08-21 23:05:15 -0700148
Austin Schuh58646e22021-08-23 23:51:46 -0700149 // Runs the Started callback.
150 void RunStarted();
Austin Schuhe33c08d2022-02-03 18:15:21 -0800151 // Runs the Started callback.
152 void RunStopped();
Austin Schuh58646e22021-08-23 23:51:46 -0700153
Austin Schuh8bd96322020-02-13 21:18:22 -0800154 // Returns true if events are being handled.
155 inline bool is_running() const;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700156
Austin Schuh8bd96322020-02-13 21:18:22 -0800157 // Returns the timestamp of the next event to trigger.
Austin Schuh58646e22021-08-23 23:51:46 -0700158 monotonic_clock::time_point OldestEvent();
Austin Schuh8bd96322020-02-13 21:18:22 -0800159 // Handles the next event.
160 void CallOldestEvent();
Alex Perrycb7da4b2019-08-28 19:35:56 -0700161
Austin Schuh8bd96322020-02-13 21:18:22 -0800162 // Converts a time to the distributed clock for scheduling and cross-node time
163 // measurement.
164 distributed_clock::time_point ToDistributedClock(
165 monotonic_clock::time_point time) const {
Austin Schuh58646e22021-08-23 23:51:46 -0700166 return converter_->ToDistributedClock(node_index_,
167 {.boot = boot_count_, .time = time});
Austin Schuh8bd96322020-02-13 21:18:22 -0800168 }
169
170 // Takes the distributed time and converts it to the monotonic clock for this
171 // node.
Austin Schuh58646e22021-08-23 23:51:46 -0700172 logger::BootTimestamp FromDistributedClock(
Austin Schuh8bd96322020-02-13 21:18:22 -0800173 distributed_clock::time_point time) const {
Austin Schuh58646e22021-08-23 23:51:46 -0700174 return converter_->FromDistributedClock(node_index_, time, boot_count_);
Austin Schuh8bd96322020-02-13 21:18:22 -0800175 }
176
177 // Returns the current monotonic time on this node calculated from the
178 // distributed clock.
179 inline monotonic_clock::time_point monotonic_now() const;
180
Austin Schuh58646e22021-08-23 23:51:46 -0700181 // Returns the current monotonic time on this node calculated from the
182 // distributed clock.
183 inline distributed_clock::time_point distributed_now() const;
184
185 size_t boot_count() const { return boot_count_; }
186
187 size_t node_index() const { return node_index_; }
188
189 // For implementing reboots.
190 void Shutdown();
191 void Startup();
192
Alex Perrycb7da4b2019-08-28 19:35:56 -0700193 private:
Austin Schuh8bd96322020-02-13 21:18:22 -0800194 friend class EventSchedulerScheduler;
Austin Schuh58646e22021-08-23 23:51:46 -0700195
Alex Perrycb7da4b2019-08-28 19:35:56 -0700196 // Current execution time.
Austin Schuhbe69cf32020-08-27 11:38:33 -0700197 monotonic_clock::time_point monotonic_now_ = monotonic_clock::epoch();
Alex Perrycb7da4b2019-08-28 19:35:56 -0700198
Austin Schuh58646e22021-08-23 23:51:46 -0700199 size_t boot_count_ = 0;
200
Austin Schuh8bd96322020-02-13 21:18:22 -0800201 // List of functions to run (once) when running.
Austin Schuh39788ff2019-12-01 18:22:57 -0800202 std::vector<std::function<void()>> on_run_;
Austin Schuh057d29f2021-08-21 23:05:15 -0700203 std::vector<std::function<void()>> on_startup_;
Austin Schuh39788ff2019-12-01 18:22:57 -0800204
Alex Perrycb7da4b2019-08-28 19:35:56 -0700205 // Multimap holding times to run functions. These are stored in order, and
206 // the order is the callback tree.
207 ChannelType events_list_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800208
209 // Pointer to the actual scheduler.
210 EventSchedulerScheduler *scheduler_scheduler_ = nullptr;
Austin Schuh87dd3832021-01-01 23:07:31 -0800211
212 // Node index handle to be handed back to the TimeConverter. This lets the
213 // same time converter be used for all the nodes, and the node index
214 // distinguish which one.
215 size_t node_index_ = 0;
216
217 // Converts time by doing nothing to it.
218 class UnityConverter final : public TimeConverter {
219 public:
220 distributed_clock::time_point ToDistributedClock(
Austin Schuh58646e22021-08-23 23:51:46 -0700221 size_t /*node_index*/, logger::BootTimestamp time) override {
222 CHECK_EQ(time.boot, 0u) << ": Reboots unsupported by default.";
223 return distributed_clock::epoch() + time.time.time_since_epoch();
Austin Schuh87dd3832021-01-01 23:07:31 -0800224 }
225
Austin Schuh58646e22021-08-23 23:51:46 -0700226 logger::BootTimestamp FromDistributedClock(
227 size_t /*node_index*/, distributed_clock::time_point time,
228 size_t boot_count) override {
229 CHECK_EQ(boot_count, 0u);
230 return logger::BootTimestamp{
231 .boot = boot_count,
232 .time = monotonic_clock::epoch() + time.time_since_epoch()};
Austin Schuh87dd3832021-01-01 23:07:31 -0800233 }
Austin Schuhb7c8d2a2021-07-19 19:22:12 -0700234
235 void ObserveTimePassed(distributed_clock::time_point /*time*/) override {}
Austin Schuh58646e22021-08-23 23:51:46 -0700236
237 UUID boot_uuid(size_t /*node_index*/, size_t boot_count) override {
238 CHECK_EQ(boot_count, 0u);
239 return uuid_;
240 }
241
242 private:
243 const UUID uuid_ = UUID::Random();
Austin Schuh87dd3832021-01-01 23:07:31 -0800244 };
245
246 UnityConverter unity_converter_;
247
248 TimeConverter *converter_ = &unity_converter_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700249};
250
Austin Schuh8bd96322020-02-13 21:18:22 -0800251// We need a heap of heaps...
252//
253// Events in a node have a very well defined progression of time. It is linear
254// and well represented by the monotonic clock.
255//
256// Events across nodes don't follow this well. Time skews between the two nodes
257// all the time. We also don't know the function ahead of time which converts
258// from each node's monotonic clock to the distributed clock (our unified base
259// time which is likely the average time between nodes).
260//
261// This pushes us towards merge sort. Sorting each node's events with a heap
262// like we used to be doing, and then sorting each of those nodes independently.
263class EventSchedulerScheduler {
264 public:
265 // Adds an event scheduler to the list.
266 void AddEventScheduler(EventScheduler *scheduler);
267
268 // Runs until there are no more events or Exit is called.
269 void Run();
270
271 // Stops running.
272 void Exit() { is_running_ = false; }
273
274 bool is_running() const { return is_running_; }
275
276 // Runs for a duration on the distributed clock. Time on the distributed
277 // clock should be very representative of time on each node, but won't be
278 // exactly the same.
279 void RunFor(distributed_clock::duration duration);
280
James Kuszmaulb67409b2022-06-20 16:25:03 -0700281 // Sets the realtime replay rate. A value of 1.0 will cause the scheduler to
282 // try to play events in realtime. 0.5 will run at half speed. Use infinity
283 // (the default) to run as fast as possible. This can be changed during
284 // run-time.
285 void SetReplayRate(double replay_rate) { replay_rate_ = replay_rate; }
286 internal::EPoll *epoll() { return &epoll_; }
287
Austin Schuh8bd96322020-02-13 21:18:22 -0800288 // Returns the current distributed time.
289 distributed_clock::time_point distributed_now() const { return now_; }
290
Austin Schuh057d29f2021-08-21 23:05:15 -0700291 void RunOnStartup() {
292 CHECK(!is_running_);
293 for (EventScheduler *scheduler : schedulers_) {
294 scheduler->RunOnStartup();
295 }
Austin Schuh58646e22021-08-23 23:51:46 -0700296 for (EventScheduler *scheduler : schedulers_) {
297 scheduler->RunStarted();
298 }
299 }
300
Austin Schuhe33c08d2022-02-03 18:15:21 -0800301 void RunStopped() {
302 CHECK(!is_running_);
303 for (EventScheduler *scheduler : schedulers_) {
304 scheduler->RunStopped();
305 }
306 }
307
Austin Schuh58646e22021-08-23 23:51:46 -0700308 void SetTimeConverter(TimeConverter *time_converter) {
309 time_converter->set_reboot_found(
310 [this](distributed_clock::time_point reboot_time,
311 const std::vector<logger::BootTimestamp> &node_times) {
312 if (!reboots_.empty()) {
313 CHECK_GT(reboot_time, std::get<0>(reboots_.back()));
314 }
315 reboots_.emplace_back(reboot_time, node_times);
316 });
Austin Schuh057d29f2021-08-21 23:05:15 -0700317 }
318
Austin Schuhe33c08d2022-02-03 18:15:21 -0800319 // Runs the provided callback now. Stops everything, runs the callback, then
320 // starts it all up again. This lets us do operations like starting and
321 // stopping applications while running.
322 void TemporarilyStopAndRun(std::function<void()> fn);
323
Austin Schuh8bd96322020-02-13 21:18:22 -0800324 private:
325 // Handles running the OnRun functions.
326 void RunOnRun() {
327 CHECK(!is_running_);
328 is_running_ = true;
329 for (EventScheduler *scheduler : schedulers_) {
330 scheduler->RunOnRun();
331 }
332 }
333
Austin Schuh58646e22021-08-23 23:51:46 -0700334 void Reboot();
335
Austin Schuh8bd96322020-02-13 21:18:22 -0800336 // Returns the next event time and scheduler on which to run it.
337 std::tuple<distributed_clock::time_point, EventScheduler *> OldestEvent();
338
James Kuszmaulb67409b2022-06-20 16:25:03 -0700339 // Handles running loop_body repeatedly until complete. loop_body should
340 // return the next time at which it wants to be called, and set is_running_ to
341 // false once we should stop.
342 template <typename F>
343 void RunMaybeRealtimeLoop(F loop_body);
344
Austin Schuh8bd96322020-02-13 21:18:22 -0800345 // True if we are running.
346 bool is_running_ = false;
347 // The current time.
348 distributed_clock::time_point now_ = distributed_clock::epoch();
349 // List of schedulers to run in sync.
350 std::vector<EventScheduler *> schedulers_;
Austin Schuh58646e22021-08-23 23:51:46 -0700351
352 // List of when to reboot each node.
353 std::vector<std::tuple<distributed_clock::time_point,
354 std::vector<logger::BootTimestamp>>>
355 reboots_;
James Kuszmaulb67409b2022-06-20 16:25:03 -0700356
357 double replay_rate_ = std::numeric_limits<double>::infinity();
358 internal::EPoll epoll_;
Austin Schuh8bd96322020-02-13 21:18:22 -0800359};
360
Austin Schuh58646e22021-08-23 23:51:46 -0700361inline distributed_clock::time_point EventScheduler::distributed_now() const {
362 return scheduler_scheduler_->distributed_now();
363}
Austin Schuh8bd96322020-02-13 21:18:22 -0800364inline monotonic_clock::time_point EventScheduler::monotonic_now() const {
Austin Schuh58646e22021-08-23 23:51:46 -0700365 const logger::BootTimestamp t =
366 FromDistributedClock(scheduler_scheduler_->distributed_now());
Austin Schuh60e77942022-05-16 17:48:24 -0700367 CHECK_EQ(t.boot, boot_count_)
368 << ": "
369 << " " << t << " d " << scheduler_scheduler_->distributed_now();
Austin Schuh58646e22021-08-23 23:51:46 -0700370 return t.time;
Austin Schuh8bd96322020-02-13 21:18:22 -0800371}
372
373inline bool EventScheduler::is_running() const {
374 return scheduler_scheduler_->is_running();
375}
376
Alex Perrycb7da4b2019-08-28 19:35:56 -0700377} // namespace aos
378
379#endif // AOS_EVENTS_EVENT_SCHEDULER_H_