Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 1 | #include "aos/events/event_scheduler.h" |
| 2 | |
| 3 | #include <algorithm> |
| 4 | #include <deque> |
| 5 | |
| 6 | #include "aos/events/event_loop.h" |
Tyler Chatow | 67ddb03 | 2020-01-12 14:30:04 -0800 | [diff] [blame] | 7 | #include "aos/logging/implementations.h" |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 8 | |
| 9 | namespace aos { |
| 10 | |
Austin Schuh | ef8f1ae | 2021-12-11 12:35:05 -0800 | [diff] [blame] | 11 | EventScheduler::Token EventScheduler::Schedule(monotonic_clock::time_point time, |
| 12 | Event *callback) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 13 | CHECK_LE(monotonic_clock::epoch(), time); |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 14 | return events_list_.emplace(time, callback); |
| 15 | } |
| 16 | |
| 17 | void EventScheduler::Deschedule(EventScheduler::Token token) { |
Brian Silverman | 7026e2d | 2021-11-11 16:15:35 -0800 | [diff] [blame] | 18 | // We basically want to DCHECK some nontrivial logic. Guard it with NDEBUG to |
| 19 | // ensure the compiler realizes it's all unnecessary when not doing debug |
| 20 | // checks. |
Brian Silverman | bd405c0 | 2020-06-23 16:25:23 -0700 | [diff] [blame] | 21 | #ifndef NDEBUG |
| 22 | { |
| 23 | bool found = false; |
| 24 | auto i = events_list_.begin(); |
| 25 | while (i != events_list_.end()) { |
| 26 | if (i == token) { |
| 27 | CHECK(!found) << ": The same iterator is in the multimap twice??"; |
| 28 | found = true; |
| 29 | } |
| 30 | ++i; |
| 31 | } |
| 32 | CHECK(found) << ": Trying to deschedule an event which is not scheduled"; |
| 33 | } |
| 34 | #endif |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 35 | events_list_.erase(token); |
| 36 | } |
| 37 | |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame] | 38 | std::pair<distributed_clock::time_point, monotonic_clock::time_point> |
| 39 | EventScheduler::OldestEvent() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 40 | // If we haven't started yet, schedule a special event for the epoch to allow |
| 41 | // ourselves to boot. |
| 42 | if (!called_started_) { |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame] | 43 | if (!cached_epoch_) { |
| 44 | cached_epoch_ = ToDistributedClock(monotonic_clock::epoch()); |
| 45 | } |
| 46 | return std::make_pair(*cached_epoch_, monotonic_clock::epoch()); |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 47 | } |
| 48 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 49 | if (events_list_.empty()) { |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame] | 50 | return std::make_pair(distributed_clock::max_time, |
| 51 | monotonic_clock::max_time); |
Austin Schuh | 39788ff | 2019-12-01 18:22:57 -0800 | [diff] [blame] | 52 | } |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 53 | |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 54 | const monotonic_clock::time_point monotonic_time = |
| 55 | events_list_.begin()->first; |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame] | 56 | if (cached_event_list_monotonic_time_ != monotonic_time) { |
| 57 | cached_event_list_time_ = ToDistributedClock(monotonic_time); |
| 58 | cached_event_list_monotonic_time_ = monotonic_time; |
| 59 | } |
| 60 | |
| 61 | return std::make_pair(cached_event_list_time_, monotonic_time); |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 62 | } |
| 63 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 64 | void EventScheduler::Shutdown() { |
| 65 | CHECK(!is_running_); |
| 66 | on_shutdown_(); |
| 67 | } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 68 | |
| 69 | void EventScheduler::Startup() { |
| 70 | ++boot_count_; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 71 | CHECK(!is_running_); |
| 72 | MaybeRunOnStartup(); |
| 73 | CHECK(called_started_); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 74 | } |
| 75 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 76 | void EventScheduler::CallOldestEvent() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 77 | if (!called_started_) { |
| 78 | // If we haven't started, start. |
| 79 | MaybeRunOnStartup(); |
| 80 | MaybeRunOnRun(); |
| 81 | CHECK(called_started_); |
| 82 | return; |
| 83 | } |
| 84 | CHECK(is_running_); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 85 | CHECK_GT(events_list_.size(), 0u); |
| 86 | auto iter = events_list_.begin(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 87 | const logger::BootTimestamp t = |
| 88 | FromDistributedClock(scheduler_scheduler_->distributed_now()); |
Austin Schuh | c1ee1b6 | 2022-03-22 17:09:52 -0700 | [diff] [blame] | 89 | VLOG(2) << "Got time back " << t; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 90 | CHECK_EQ(t.boot, boot_count_); |
| 91 | CHECK_EQ(t.time, iter->first) << ": Time is wrong on node " << node_index_; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 92 | |
Austin Schuh | ef8f1ae | 2021-12-11 12:35:05 -0800 | [diff] [blame] | 93 | Event *callback = iter->second; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 94 | events_list_.erase(iter); |
Austin Schuh | ef8f1ae | 2021-12-11 12:35:05 -0800 | [diff] [blame] | 95 | callback->Handle(); |
Austin Schuh | b7c8d2a | 2021-07-19 19:22:12 -0700 | [diff] [blame] | 96 | |
| 97 | converter_->ObserveTimePassed(scheduler_scheduler_->distributed_now()); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 98 | } |
| 99 | |
| 100 | void EventScheduler::RunOnRun() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 101 | CHECK(is_running_); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 102 | while (!on_run_.empty()) { |
| 103 | std::function<void()> fn = std::move(*on_run_.begin()); |
| 104 | on_run_.erase(on_run_.begin()); |
| 105 | fn(); |
Austin Schuh | 39788ff | 2019-12-01 18:22:57 -0800 | [diff] [blame] | 106 | } |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 107 | } |
| 108 | |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 109 | void EventScheduler::RunOnStartup() noexcept { |
| 110 | while (!on_startup_.empty()) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 111 | CHECK(!is_running_); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 112 | std::function<void()> fn = std::move(*on_startup_.begin()); |
| 113 | on_startup_.erase(on_startup_.begin()); |
| 114 | fn(); |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 115 | } |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 116 | } |
| 117 | |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 118 | void EventScheduler::RunStarted() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 119 | CHECK(!is_running_); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 120 | if (started_) { |
| 121 | started_(); |
| 122 | } |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 123 | is_running_ = true; |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 124 | } |
| 125 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 126 | void EventScheduler::MaybeRunStopped() { |
| 127 | CHECK(is_running_); |
| 128 | is_running_ = false; |
| 129 | if (called_started_) { |
| 130 | called_started_ = false; |
| 131 | if (stopped_) { |
| 132 | stopped_(); |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | void EventScheduler::MaybeRunOnStartup() { |
| 138 | CHECK(!called_started_); |
| 139 | CHECK(!is_running_); |
| 140 | const logger::BootTimestamp t = |
| 141 | FromDistributedClock(scheduler_scheduler_->distributed_now()); |
| 142 | if (t.boot == boot_count_ && t.time >= monotonic_clock::epoch()) { |
| 143 | called_started_ = true; |
| 144 | RunOnStartup(); |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | void EventScheduler::MaybeRunOnRun() { |
| 149 | if (called_started_) { |
| 150 | RunStarted(); |
| 151 | RunOnRun(); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 152 | } |
| 153 | } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 154 | |
Austin Schuh | ac0771c | 2020-01-07 18:36:30 -0800 | [diff] [blame] | 155 | std::ostream &operator<<(std::ostream &stream, |
| 156 | const aos::distributed_clock::time_point &now) { |
| 157 | // Print it the same way we print a monotonic time. Literally. |
| 158 | stream << monotonic_clock::time_point(now.time_since_epoch()); |
| 159 | return stream; |
| 160 | } |
| 161 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 162 | void EventSchedulerScheduler::AddEventScheduler(EventScheduler *scheduler) { |
| 163 | CHECK(std::find(schedulers_.begin(), schedulers_.end(), scheduler) == |
| 164 | schedulers_.end()); |
| 165 | CHECK(scheduler->scheduler_scheduler_ == nullptr); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 166 | CHECK_EQ(scheduler->node_index(), schedulers_.size()); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 167 | |
| 168 | schedulers_.emplace_back(scheduler); |
| 169 | scheduler->scheduler_scheduler_ = this; |
| 170 | } |
| 171 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 172 | void EventSchedulerScheduler::MaybeRunStopped() { |
| 173 | CHECK(!is_running_); |
| 174 | for (EventScheduler *scheduler : schedulers_) { |
| 175 | if (scheduler->is_running()) { |
| 176 | scheduler->MaybeRunStopped(); |
| 177 | } |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | bool EventSchedulerScheduler::RunUntil( |
| 182 | realtime_clock::time_point end_time, EventScheduler *scheduler, |
| 183 | std::function<std::chrono::nanoseconds()> fn_realtime_offset) { |
| 184 | logging::ScopedLogRestorer prev_logger; |
| 185 | MaybeRunOnStartup(); |
| 186 | |
| 187 | bool reached_end_time = false; |
| 188 | |
| 189 | RunMaybeRealtimeLoop([this, scheduler, end_time, fn_realtime_offset, |
| 190 | &reached_end_time]() { |
| 191 | std::tuple<distributed_clock::time_point, EventScheduler *> oldest_event = |
| 192 | OldestEvent(); |
| 193 | aos::distributed_clock::time_point oldest_event_time_distributed = |
| 194 | std::get<0>(oldest_event); |
| 195 | logger::BootTimestamp test_time_monotonic = |
| 196 | scheduler->FromDistributedClock(oldest_event_time_distributed); |
| 197 | realtime_clock::time_point oldest_event_realtime( |
| 198 | test_time_monotonic.time_since_epoch() + fn_realtime_offset()); |
| 199 | |
| 200 | if ((std::get<0>(oldest_event) == distributed_clock::max_time) || |
| 201 | (oldest_event_realtime > end_time && |
| 202 | (reboots_.empty() || |
| 203 | std::get<0>(reboots_.front()) > oldest_event_time_distributed))) { |
| 204 | is_running_ = false; |
| 205 | reached_end_time = true; |
| 206 | |
| 207 | // We have to nudge our time back to the distributed time |
| 208 | // corresponding to our desired realtime time. |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame] | 209 | const monotonic_clock::time_point end_monotonic = |
| 210 | monotonic_clock::epoch() + end_time.time_since_epoch() - |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 211 | fn_realtime_offset(); |
| 212 | const aos::distributed_clock::time_point end_time_distributed = |
| 213 | scheduler->ToDistributedClock(end_monotonic); |
| 214 | |
| 215 | now_ = end_time_distributed; |
| 216 | |
| 217 | return; |
| 218 | } |
| 219 | |
| 220 | if (!reboots_.empty() && |
| 221 | std::get<0>(reboots_.front()) <= std::get<0>(oldest_event)) { |
| 222 | // Reboot is next. |
| 223 | CHECK_LE(now_, |
| 224 | std::get<0>(reboots_.front()) + std::chrono::nanoseconds(1)) |
| 225 | << ": Simulated time went backwards by too much. Please " |
| 226 | "investigate."; |
| 227 | now_ = std::get<0>(reboots_.front()); |
| 228 | Reboot(); |
| 229 | reboots_.erase(reboots_.begin()); |
| 230 | return; |
| 231 | } |
| 232 | |
| 233 | // We get to pick our tradeoffs here. Either we assume that there are |
| 234 | // no backward step changes in our time function for each node, or we |
| 235 | // have to let time go backwards. We currently only really see this |
| 236 | // happen when 2 events are scheduled for "now", time changes, and |
| 237 | // there is a nanosecond or two of rounding due to integer math. |
| 238 | // |
| 239 | // //aos/events/logging:logger_test triggers this. |
| 240 | CHECK_LE(now_, std::get<0>(oldest_event) + std::chrono::nanoseconds(1)) |
| 241 | << ": Simulated time went backwards by too much. Please " |
| 242 | "investigate."; |
| 243 | |
| 244 | now_ = std::get<0>(oldest_event); |
| 245 | |
| 246 | std::get<1>(oldest_event)->CallOldestEvent(); |
| 247 | }); |
| 248 | |
| 249 | MaybeRunStopped(); |
| 250 | |
| 251 | return reached_end_time; |
| 252 | } |
| 253 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 254 | void EventSchedulerScheduler::Reboot() { |
| 255 | const std::vector<logger::BootTimestamp> × = |
| 256 | std::get<1>(reboots_.front()); |
| 257 | CHECK_EQ(times.size(), schedulers_.size()); |
| 258 | |
| 259 | VLOG(1) << "Rebooting at " << now_; |
| 260 | for (const auto &time : times) { |
| 261 | VLOG(1) << " " << time; |
| 262 | } |
| 263 | |
| 264 | is_running_ = false; |
| 265 | |
| 266 | // Shut everything down. |
| 267 | std::vector<size_t> rebooted; |
| 268 | for (size_t node_index = 0; node_index < schedulers_.size(); ++node_index) { |
| 269 | if (schedulers_[node_index]->boot_count() == times[node_index].boot) { |
| 270 | continue; |
| 271 | } else { |
| 272 | rebooted.emplace_back(node_index); |
| 273 | CHECK_EQ(schedulers_[node_index]->boot_count() + 1, |
| 274 | times[node_index].boot); |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 275 | schedulers_[node_index]->MaybeRunStopped(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 276 | schedulers_[node_index]->Shutdown(); |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | // And start it back up again to reboot. When something starts back up |
| 281 | // (especially message_bridge), it could try to send stuff out. We want |
| 282 | // to move everything over to the new boot before doing that. |
| 283 | for (const size_t node_index : rebooted) { |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 284 | schedulers_[node_index]->Startup(); |
| 285 | } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 286 | for (const size_t node_index : rebooted) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 287 | schedulers_[node_index]->MaybeRunOnRun(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 288 | } |
| 289 | is_running_ = true; |
| 290 | } |
| 291 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 292 | void EventSchedulerScheduler::RunFor(distributed_clock::duration duration) { |
| 293 | distributed_clock::time_point end_time = now_ + duration; |
| 294 | logging::ScopedLogRestorer prev_logger; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 295 | MaybeRunOnStartup(); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 296 | |
| 297 | // Run all the sub-event-schedulers. |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 298 | RunMaybeRealtimeLoop([this, end_time]() { |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 299 | std::tuple<distributed_clock::time_point, EventScheduler *> oldest_event = |
| 300 | OldestEvent(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 301 | if (!reboots_.empty() && |
| 302 | std::get<0>(reboots_.front()) <= std::get<0>(oldest_event)) { |
| 303 | // Reboot is next. |
| 304 | if (std::get<0>(reboots_.front()) > end_time) { |
| 305 | // Reboot is after our end time, give up. |
| 306 | is_running_ = false; |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 307 | return; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 308 | } |
| 309 | |
| 310 | CHECK_LE(now_, |
| 311 | std::get<0>(reboots_.front()) + std::chrono::nanoseconds(1)) |
| 312 | << ": Simulated time went backwards by too much. Please " |
| 313 | "investigate."; |
| 314 | now_ = std::get<0>(reboots_.front()); |
| 315 | Reboot(); |
| 316 | reboots_.erase(reboots_.begin()); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 317 | return; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 318 | } |
| 319 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 320 | // No events left, bail. |
| 321 | if (std::get<0>(oldest_event) == distributed_clock::max_time || |
| 322 | std::get<0>(oldest_event) > end_time) { |
| 323 | is_running_ = false; |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 324 | return; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 325 | } |
| 326 | |
| 327 | // We get to pick our tradeoffs here. Either we assume that there are no |
| 328 | // backward step changes in our time function for each node, or we have to |
Austin Schuh | 2f8fd75 | 2020-09-01 22:38:28 -0700 | [diff] [blame] | 329 | // let time go backwards. We currently only really see this happen when 2 |
| 330 | // events are scheduled for "now", time changes, and there is a nanosecond |
| 331 | // or two of rounding due to integer math. |
| 332 | // |
| 333 | // //aos/events/logging:logger_test triggers this. |
| 334 | CHECK_LE(now_, std::get<0>(oldest_event) + std::chrono::nanoseconds(1)) |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 335 | << ": Simulated time went backwards by too much. Please investigate."; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 336 | // push time forwards |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 337 | now_ = std::get<0>(oldest_event); |
| 338 | |
| 339 | std::get<1>(oldest_event)->CallOldestEvent(); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 340 | }); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 341 | |
| 342 | now_ = end_time; |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 343 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 344 | MaybeRunStopped(); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 345 | } |
| 346 | |
| 347 | void EventSchedulerScheduler::Run() { |
| 348 | logging::ScopedLogRestorer prev_logger; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 349 | MaybeRunOnStartup(); |
| 350 | |
| 351 | // Run all the sub-event-schedulers. |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 352 | RunMaybeRealtimeLoop([this]() { |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 353 | std::tuple<distributed_clock::time_point, EventScheduler *> oldest_event = |
| 354 | OldestEvent(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 355 | if (!reboots_.empty() && |
| 356 | std::get<0>(reboots_.front()) <= std::get<0>(oldest_event)) { |
| 357 | // Reboot is next. |
| 358 | CHECK_LE(now_, |
| 359 | std::get<0>(reboots_.front()) + std::chrono::nanoseconds(1)) |
| 360 | << ": Simulated time went backwards by too much. Please " |
| 361 | "investigate."; |
| 362 | now_ = std::get<0>(reboots_.front()); |
| 363 | Reboot(); |
| 364 | reboots_.erase(reboots_.begin()); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 365 | return; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 366 | } |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 367 | // No events left, bail. |
| 368 | if (std::get<0>(oldest_event) == distributed_clock::max_time) { |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 369 | is_running_ = false; |
| 370 | return; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 371 | } |
| 372 | |
| 373 | // We get to pick our tradeoffs here. Either we assume that there are no |
| 374 | // backward step changes in our time function for each node, or we have to |
Austin Schuh | 2f8fd75 | 2020-09-01 22:38:28 -0700 | [diff] [blame] | 375 | // let time go backwards. We currently only really see this happen when 2 |
| 376 | // events are scheduled for "now", time changes, and there is a nanosecond |
| 377 | // or two of rounding due to integer math. |
| 378 | // |
| 379 | // //aos/events/logging:logger_test triggers this. |
| 380 | CHECK_LE(now_, std::get<0>(oldest_event) + std::chrono::nanoseconds(1)) |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 381 | << ": Simulated time went backwards by too much. Please investigate."; |
| 382 | now_ = std::get<0>(oldest_event); |
| 383 | |
| 384 | std::get<1>(oldest_event)->CallOldestEvent(); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 385 | }); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 386 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 387 | MaybeRunStopped(); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 388 | } |
| 389 | |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 390 | template <typename F> |
| 391 | void EventSchedulerScheduler::RunMaybeRealtimeLoop(F loop_body) { |
| 392 | internal::TimerFd timerfd; |
| 393 | CHECK_LT(0.0, replay_rate_) << "Replay rate must be positive."; |
| 394 | distributed_clock::time_point last_distributed_clock = |
| 395 | std::get<0>(OldestEvent()); |
| 396 | monotonic_clock::time_point last_monotonic_clock = monotonic_clock::now(); |
| 397 | timerfd.SetTime(last_monotonic_clock, std::chrono::seconds(0)); |
| 398 | epoll_.OnReadable( |
| 399 | timerfd.fd(), [this, &last_distributed_clock, &last_monotonic_clock, |
| 400 | &timerfd, loop_body]() { |
| 401 | const uint64_t read_result = timerfd.Read(); |
| 402 | if (!is_running_) { |
| 403 | epoll_.Quit(); |
| 404 | return; |
| 405 | } |
| 406 | CHECK_EQ(read_result, 1u); |
| 407 | // Call loop_body() at least once; if we are in infinite-speed replay, |
| 408 | // we don't actually want/need the context switches from the epoll |
| 409 | // setup, so just loop. |
| 410 | // Note: The performance impacts of this code have not been carefully |
| 411 | // inspected (e.g., how much does avoiding the context-switch help; does |
| 412 | // the timerfd_settime call matter). |
| 413 | // This is deliberately written to support the user changing replay |
| 414 | // rates dynamically. |
| 415 | do { |
| 416 | loop_body(); |
| 417 | if (is_running_) { |
| 418 | const monotonic_clock::time_point next_trigger = |
| 419 | last_monotonic_clock + |
| 420 | std::chrono::duration_cast<std::chrono::nanoseconds>( |
| 421 | (now_ - last_distributed_clock) / replay_rate_); |
| 422 | timerfd.SetTime(next_trigger, std::chrono::seconds(0)); |
| 423 | last_monotonic_clock = next_trigger; |
| 424 | last_distributed_clock = now_; |
| 425 | } else { |
| 426 | epoll_.Quit(); |
| 427 | } |
| 428 | } while (replay_rate_ == std::numeric_limits<double>::infinity() && |
| 429 | is_running_); |
| 430 | }); |
| 431 | |
| 432 | epoll_.Run(); |
| 433 | epoll_.DeleteFd(timerfd.fd()); |
| 434 | } |
| 435 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 436 | std::tuple<distributed_clock::time_point, EventScheduler *> |
| 437 | EventSchedulerScheduler::OldestEvent() { |
| 438 | distributed_clock::time_point min_event_time = distributed_clock::max_time; |
| 439 | EventScheduler *min_scheduler = nullptr; |
| 440 | |
| 441 | // TODO(austin): Don't linearly search... But for N=3, it is probably the |
| 442 | // fastest way to do this. |
| 443 | for (EventScheduler *scheduler : schedulers_) { |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame] | 444 | const std::pair<distributed_clock::time_point, monotonic_clock::time_point> |
| 445 | event_time = scheduler->OldestEvent(); |
| 446 | if (event_time.second != monotonic_clock::max_time) { |
| 447 | if (event_time.first < min_event_time) { |
| 448 | min_event_time = event_time.first; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 449 | min_scheduler = scheduler; |
| 450 | } |
| 451 | } |
| 452 | } |
| 453 | |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 454 | if (min_scheduler) { |
Austin Schuh | c1ee1b6 | 2022-03-22 17:09:52 -0700 | [diff] [blame] | 455 | VLOG(2) << "Oldest event " << min_event_time << " on scheduler " |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 456 | << min_scheduler->node_index_; |
| 457 | } |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 458 | return std::make_tuple(min_event_time, min_scheduler); |
| 459 | } |
| 460 | |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 461 | void EventSchedulerScheduler::TemporarilyStopAndRun(std::function<void()> fn) { |
Austin Schuh | 3e31f91 | 2023-08-21 21:29:10 -0700 | [diff] [blame^] | 462 | if (in_on_run_) { |
| 463 | LOG(FATAL) |
| 464 | << "Can't call AllowApplicationCreationDuring from an OnRun callback."; |
| 465 | } |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 466 | const bool was_running = is_running_; |
| 467 | if (is_running_) { |
| 468 | is_running_ = false; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 469 | MaybeRunStopped(); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 470 | } |
| 471 | fn(); |
| 472 | if (was_running) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 473 | MaybeRunOnStartup(); |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | void EventSchedulerScheduler::MaybeRunOnStartup() { |
| 478 | is_running_ = true; |
| 479 | for (EventScheduler *scheduler : schedulers_) { |
| 480 | scheduler->MaybeRunOnStartup(); |
| 481 | } |
Austin Schuh | 3e31f91 | 2023-08-21 21:29:10 -0700 | [diff] [blame^] | 482 | in_on_run_ = true; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 483 | // We must trigger all the OnRun's *after* all the OnStartup callbacks are |
| 484 | // triggered because that is the contract that we have stated. |
| 485 | for (EventScheduler *scheduler : schedulers_) { |
| 486 | scheduler->MaybeRunOnRun(); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 487 | } |
Austin Schuh | 3e31f91 | 2023-08-21 21:29:10 -0700 | [diff] [blame^] | 488 | in_on_run_ = false; |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 489 | } |
| 490 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 491 | } // namespace aos |