Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 1 | #ifndef AOS_EVENTS_EVENT_SCHEDULER_H_ |
| 2 | #define AOS_EVENTS_EVENT_SCHEDULER_H_ |
| 3 | |
| 4 | #include <algorithm> |
| 5 | #include <map> |
| 6 | #include <memory> |
| 7 | #include <unordered_set> |
| 8 | #include <utility> |
| 9 | #include <vector> |
| 10 | |
| 11 | #include "aos/events/event_loop.h" |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 12 | #include "aos/events/logging/boot_timestamp.h" |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 13 | #include "aos/logging/implementations.h" |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 14 | #include "aos/time/time.h" |
| 15 | #include "glog/logging.h" |
| 16 | |
| 17 | namespace aos { |
| 18 | |
Austin Schuh | ac0771c | 2020-01-07 18:36:30 -0800 | [diff] [blame] | 19 | // This clock is the basis for distributed time. It is used to synchronize time |
| 20 | // between multiple nodes. This is a new type so conversions to and from the |
| 21 | // monotonic and realtime clocks aren't implicit. |
| 22 | class distributed_clock { |
| 23 | public: |
| 24 | typedef ::std::chrono::nanoseconds::rep rep; |
| 25 | typedef ::std::chrono::nanoseconds::period period; |
| 26 | typedef ::std::chrono::nanoseconds duration; |
| 27 | typedef ::std::chrono::time_point<distributed_clock> time_point; |
| 28 | |
| 29 | // This clock is the base clock for the simulation and everything is synced to |
| 30 | // it. It never jumps. |
| 31 | static constexpr bool is_steady = true; |
| 32 | |
| 33 | // Returns the epoch (0). |
| 34 | static constexpr time_point epoch() { return time_point(zero()); } |
| 35 | |
| 36 | static constexpr duration zero() { return duration(0); } |
| 37 | |
| 38 | static constexpr time_point min_time{ |
| 39 | time_point(duration(::std::numeric_limits<duration::rep>::min()))}; |
| 40 | static constexpr time_point max_time{ |
| 41 | time_point(duration(::std::numeric_limits<duration::rep>::max()))}; |
| 42 | }; |
| 43 | |
| 44 | std::ostream &operator<<(std::ostream &stream, |
| 45 | const aos::distributed_clock::time_point &now); |
| 46 | |
Austin Schuh | a9abc03 | 2021-01-01 16:46:19 -0800 | [diff] [blame] | 47 | // Interface to handle converting time on a node to and from the distributed |
| 48 | // clock accurately. |
| 49 | class TimeConverter { |
| 50 | public: |
| 51 | virtual ~TimeConverter() {} |
| 52 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 53 | // Returns the boot UUID for a node and boot. Note: the boot UUID for |
| 54 | // subsequent calls needs to be the same each time. |
| 55 | virtual UUID boot_uuid(size_t node_index, size_t boot_count) = 0; |
| 56 | |
| 57 | void set_reboot_found( |
| 58 | std::function<void(distributed_clock::time_point, |
| 59 | const std::vector<logger::BootTimestamp> &)> |
| 60 | fn) { |
| 61 | reboot_found_ = fn; |
| 62 | } |
| 63 | |
Austin Schuh | a9abc03 | 2021-01-01 16:46:19 -0800 | [diff] [blame] | 64 | // Converts a time to the distributed clock for scheduling and cross-node |
| 65 | // time measurement. |
| 66 | virtual distributed_clock::time_point ToDistributedClock( |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 67 | size_t node_index, logger::BootTimestamp time) = 0; |
Austin Schuh | a9abc03 | 2021-01-01 16:46:19 -0800 | [diff] [blame] | 68 | |
| 69 | // Takes the distributed time and converts it to the monotonic clock for this |
| 70 | // node. |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 71 | virtual logger::BootTimestamp FromDistributedClock( |
| 72 | size_t node_index, distributed_clock::time_point time, |
| 73 | size_t boot_count) = 0; |
Austin Schuh | b7c8d2a | 2021-07-19 19:22:12 -0700 | [diff] [blame] | 74 | |
| 75 | // Called whenever time passes this point and we can forget about it. |
| 76 | virtual void ObserveTimePassed(distributed_clock::time_point time) = 0; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 77 | |
| 78 | protected: |
| 79 | std::function<void(distributed_clock::time_point, |
| 80 | const std::vector<logger::BootTimestamp> &)> |
| 81 | reboot_found_; |
Austin Schuh | a9abc03 | 2021-01-01 16:46:19 -0800 | [diff] [blame] | 82 | }; |
| 83 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 84 | class EventSchedulerScheduler; |
| 85 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 86 | class EventScheduler { |
| 87 | public: |
| 88 | using ChannelType = |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 89 | std::multimap<monotonic_clock::time_point, std::function<void()>>; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 90 | using Token = ChannelType::iterator; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 91 | EventScheduler(size_t node_index) : node_index_(node_index) {} |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 92 | |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 93 | // Sets the time converter in use for this scheduler (and the corresponding |
| 94 | // node index) |
| 95 | void SetTimeConverter(size_t node_index, TimeConverter *converter) { |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 96 | CHECK_EQ(node_index_, node_index); |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 97 | converter_ = converter; |
| 98 | } |
| 99 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 100 | UUID boot_uuid() { |
| 101 | return converter_->boot_uuid(node_index_, boot_count_); |
| 102 | } |
| 103 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 104 | // Schedule an event with a callback function |
| 105 | // Returns an iterator to the event |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 106 | Token Schedule(monotonic_clock::time_point time, |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 107 | std::function<void()> callback); |
| 108 | |
Austin Schuh | 39788ff | 2019-12-01 18:22:57 -0800 | [diff] [blame] | 109 | // Schedules a callback when the event scheduler starts. |
| 110 | void ScheduleOnRun(std::function<void()> callback) { |
| 111 | on_run_.emplace_back(std::move(callback)); |
| 112 | } |
| 113 | |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 114 | // Schedules a callback when the event scheduler starts. |
| 115 | void ScheduleOnStartup(std::function<void()> callback) { |
| 116 | on_startup_.emplace_back(std::move(callback)); |
| 117 | } |
| 118 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 119 | void set_on_shutdown(std::function<void()> callback) { |
| 120 | on_shutdown_ = std::move(callback); |
| 121 | } |
| 122 | |
| 123 | void set_started(std::function<void()> callback) { |
| 124 | started_ = std::move(callback); |
| 125 | } |
| 126 | |
| 127 | std::function<void()> started_; |
| 128 | std::function<void()> on_shutdown_; |
| 129 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 130 | Token InvalidToken() { return events_list_.end(); } |
| 131 | |
| 132 | // Deschedule an event by its iterator |
| 133 | void Deschedule(Token token); |
| 134 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 135 | // Runs the OnRun callbacks. |
| 136 | void RunOnRun(); |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 137 | |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 138 | // Runs the OnStartup callbacks. |
| 139 | void RunOnStartup(); |
| 140 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 141 | // Runs the Started callback. |
| 142 | void RunStarted(); |
| 143 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 144 | // Returns true if events are being handled. |
| 145 | inline bool is_running() const; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 146 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 147 | // Returns the timestamp of the next event to trigger. |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 148 | monotonic_clock::time_point OldestEvent(); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 149 | // Handles the next event. |
| 150 | void CallOldestEvent(); |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 151 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 152 | // Converts a time to the distributed clock for scheduling and cross-node time |
| 153 | // measurement. |
| 154 | distributed_clock::time_point ToDistributedClock( |
| 155 | monotonic_clock::time_point time) const { |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 156 | return converter_->ToDistributedClock(node_index_, |
| 157 | {.boot = boot_count_, .time = time}); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 158 | } |
| 159 | |
| 160 | // Takes the distributed time and converts it to the monotonic clock for this |
| 161 | // node. |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 162 | logger::BootTimestamp FromDistributedClock( |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 163 | distributed_clock::time_point time) const { |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 164 | return converter_->FromDistributedClock(node_index_, time, boot_count_); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 165 | } |
| 166 | |
| 167 | // Returns the current monotonic time on this node calculated from the |
| 168 | // distributed clock. |
| 169 | inline monotonic_clock::time_point monotonic_now() const; |
| 170 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 171 | // Returns the current monotonic time on this node calculated from the |
| 172 | // distributed clock. |
| 173 | inline distributed_clock::time_point distributed_now() const; |
| 174 | |
| 175 | size_t boot_count() const { return boot_count_; } |
| 176 | |
| 177 | size_t node_index() const { return node_index_; } |
| 178 | |
| 179 | // For implementing reboots. |
| 180 | void Shutdown(); |
| 181 | void Startup(); |
| 182 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 183 | private: |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 184 | friend class EventSchedulerScheduler; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 185 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 186 | // Current execution time. |
Austin Schuh | be69cf3 | 2020-08-27 11:38:33 -0700 | [diff] [blame] | 187 | monotonic_clock::time_point monotonic_now_ = monotonic_clock::epoch(); |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 188 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 189 | size_t boot_count_ = 0; |
| 190 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 191 | // List of functions to run (once) when running. |
Austin Schuh | 39788ff | 2019-12-01 18:22:57 -0800 | [diff] [blame] | 192 | std::vector<std::function<void()>> on_run_; |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 193 | std::vector<std::function<void()>> on_startup_; |
Austin Schuh | 39788ff | 2019-12-01 18:22:57 -0800 | [diff] [blame] | 194 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 195 | // Multimap holding times to run functions. These are stored in order, and |
| 196 | // the order is the callback tree. |
| 197 | ChannelType events_list_; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 198 | |
| 199 | // Pointer to the actual scheduler. |
| 200 | EventSchedulerScheduler *scheduler_scheduler_ = nullptr; |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 201 | |
| 202 | // Node index handle to be handed back to the TimeConverter. This lets the |
| 203 | // same time converter be used for all the nodes, and the node index |
| 204 | // distinguish which one. |
| 205 | size_t node_index_ = 0; |
| 206 | |
| 207 | // Converts time by doing nothing to it. |
| 208 | class UnityConverter final : public TimeConverter { |
| 209 | public: |
| 210 | distributed_clock::time_point ToDistributedClock( |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 211 | size_t /*node_index*/, logger::BootTimestamp time) override { |
| 212 | CHECK_EQ(time.boot, 0u) << ": Reboots unsupported by default."; |
| 213 | return distributed_clock::epoch() + time.time.time_since_epoch(); |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 214 | } |
| 215 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 216 | logger::BootTimestamp FromDistributedClock( |
| 217 | size_t /*node_index*/, distributed_clock::time_point time, |
| 218 | size_t boot_count) override { |
| 219 | CHECK_EQ(boot_count, 0u); |
| 220 | return logger::BootTimestamp{ |
| 221 | .boot = boot_count, |
| 222 | .time = monotonic_clock::epoch() + time.time_since_epoch()}; |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 223 | } |
Austin Schuh | b7c8d2a | 2021-07-19 19:22:12 -0700 | [diff] [blame] | 224 | |
| 225 | void ObserveTimePassed(distributed_clock::time_point /*time*/) override {} |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 226 | |
| 227 | UUID boot_uuid(size_t /*node_index*/, size_t boot_count) override { |
| 228 | CHECK_EQ(boot_count, 0u); |
| 229 | return uuid_; |
| 230 | } |
| 231 | |
| 232 | private: |
| 233 | const UUID uuid_ = UUID::Random(); |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 234 | }; |
| 235 | |
| 236 | UnityConverter unity_converter_; |
| 237 | |
| 238 | TimeConverter *converter_ = &unity_converter_; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 239 | }; |
| 240 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 241 | // We need a heap of heaps... |
| 242 | // |
| 243 | // Events in a node have a very well defined progression of time. It is linear |
| 244 | // and well represented by the monotonic clock. |
| 245 | // |
| 246 | // Events across nodes don't follow this well. Time skews between the two nodes |
| 247 | // all the time. We also don't know the function ahead of time which converts |
| 248 | // from each node's monotonic clock to the distributed clock (our unified base |
| 249 | // time which is likely the average time between nodes). |
| 250 | // |
| 251 | // This pushes us towards merge sort. Sorting each node's events with a heap |
| 252 | // like we used to be doing, and then sorting each of those nodes independently. |
| 253 | class EventSchedulerScheduler { |
| 254 | public: |
| 255 | // Adds an event scheduler to the list. |
| 256 | void AddEventScheduler(EventScheduler *scheduler); |
| 257 | |
| 258 | // Runs until there are no more events or Exit is called. |
| 259 | void Run(); |
| 260 | |
| 261 | // Stops running. |
| 262 | void Exit() { is_running_ = false; } |
| 263 | |
| 264 | bool is_running() const { return is_running_; } |
| 265 | |
| 266 | // Runs for a duration on the distributed clock. Time on the distributed |
| 267 | // clock should be very representative of time on each node, but won't be |
| 268 | // exactly the same. |
| 269 | void RunFor(distributed_clock::duration duration); |
| 270 | |
| 271 | // Returns the current distributed time. |
| 272 | distributed_clock::time_point distributed_now() const { return now_; } |
| 273 | |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 274 | void RunOnStartup() { |
| 275 | CHECK(!is_running_); |
| 276 | for (EventScheduler *scheduler : schedulers_) { |
| 277 | scheduler->RunOnStartup(); |
| 278 | } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 279 | for (EventScheduler *scheduler : schedulers_) { |
| 280 | scheduler->RunStarted(); |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | void SetTimeConverter(TimeConverter *time_converter) { |
| 285 | time_converter->set_reboot_found( |
| 286 | [this](distributed_clock::time_point reboot_time, |
| 287 | const std::vector<logger::BootTimestamp> &node_times) { |
| 288 | if (!reboots_.empty()) { |
| 289 | CHECK_GT(reboot_time, std::get<0>(reboots_.back())); |
| 290 | } |
| 291 | reboots_.emplace_back(reboot_time, node_times); |
| 292 | }); |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 293 | } |
| 294 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 295 | private: |
| 296 | // Handles running the OnRun functions. |
| 297 | void RunOnRun() { |
| 298 | CHECK(!is_running_); |
| 299 | is_running_ = true; |
| 300 | for (EventScheduler *scheduler : schedulers_) { |
| 301 | scheduler->RunOnRun(); |
| 302 | } |
| 303 | } |
| 304 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 305 | void Reboot(); |
| 306 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 307 | // Returns the next event time and scheduler on which to run it. |
| 308 | std::tuple<distributed_clock::time_point, EventScheduler *> OldestEvent(); |
| 309 | |
| 310 | // True if we are running. |
| 311 | bool is_running_ = false; |
| 312 | // The current time. |
| 313 | distributed_clock::time_point now_ = distributed_clock::epoch(); |
| 314 | // List of schedulers to run in sync. |
| 315 | std::vector<EventScheduler *> schedulers_; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 316 | |
| 317 | // List of when to reboot each node. |
| 318 | std::vector<std::tuple<distributed_clock::time_point, |
| 319 | std::vector<logger::BootTimestamp>>> |
| 320 | reboots_; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 321 | }; |
| 322 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 323 | inline distributed_clock::time_point EventScheduler::distributed_now() const { |
| 324 | return scheduler_scheduler_->distributed_now(); |
| 325 | } |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 326 | inline monotonic_clock::time_point EventScheduler::monotonic_now() const { |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame^] | 327 | const logger::BootTimestamp t = |
| 328 | FromDistributedClock(scheduler_scheduler_->distributed_now()); |
| 329 | CHECK_EQ(t.boot, boot_count_) << ": " << " " << t << " d " |
| 330 | << scheduler_scheduler_->distributed_now(); |
| 331 | return t.time; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 332 | } |
| 333 | |
| 334 | inline bool EventScheduler::is_running() const { |
| 335 | return scheduler_scheduler_->is_running(); |
| 336 | } |
| 337 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 338 | } // namespace aos |
| 339 | |
| 340 | #endif // AOS_EVENTS_EVENT_SCHEDULER_H_ |