Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 1 | #include "aos/events/event_scheduler.h" |
| 2 | |
| 3 | #include <algorithm> |
| 4 | #include <deque> |
| 5 | |
| 6 | #include "aos/events/event_loop.h" |
Tyler Chatow | 67ddb03 | 2020-01-12 14:30:04 -0800 | [diff] [blame] | 7 | #include "aos/logging/implementations.h" |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 8 | |
| 9 | namespace aos { |
| 10 | |
Austin Schuh | ef8f1ae | 2021-12-11 12:35:05 -0800 | [diff] [blame] | 11 | EventScheduler::Token EventScheduler::Schedule(monotonic_clock::time_point time, |
| 12 | Event *callback) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 13 | CHECK_LE(monotonic_clock::epoch(), time); |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 14 | return events_list_.emplace(time, callback); |
| 15 | } |
| 16 | |
| 17 | void EventScheduler::Deschedule(EventScheduler::Token token) { |
Brian Silverman | 7026e2d | 2021-11-11 16:15:35 -0800 | [diff] [blame] | 18 | // We basically want to DCHECK some nontrivial logic. Guard it with NDEBUG to |
| 19 | // ensure the compiler realizes it's all unnecessary when not doing debug |
| 20 | // checks. |
Brian Silverman | bd405c0 | 2020-06-23 16:25:23 -0700 | [diff] [blame] | 21 | #ifndef NDEBUG |
| 22 | { |
| 23 | bool found = false; |
| 24 | auto i = events_list_.begin(); |
| 25 | while (i != events_list_.end()) { |
| 26 | if (i == token) { |
| 27 | CHECK(!found) << ": The same iterator is in the multimap twice??"; |
| 28 | found = true; |
| 29 | } |
| 30 | ++i; |
| 31 | } |
| 32 | CHECK(found) << ": Trying to deschedule an event which is not scheduled"; |
| 33 | } |
| 34 | #endif |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 35 | events_list_.erase(token); |
| 36 | } |
| 37 | |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame^] | 38 | std::pair<distributed_clock::time_point, monotonic_clock::time_point> |
| 39 | EventScheduler::OldestEvent() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 40 | // If we haven't started yet, schedule a special event for the epoch to allow |
| 41 | // ourselves to boot. |
| 42 | if (!called_started_) { |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame^] | 43 | if (!cached_epoch_) { |
| 44 | cached_epoch_ = ToDistributedClock(monotonic_clock::epoch()); |
| 45 | } |
| 46 | return std::make_pair(*cached_epoch_, monotonic_clock::epoch()); |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 47 | } |
| 48 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 49 | if (events_list_.empty()) { |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame^] | 50 | return std::make_pair(distributed_clock::max_time, |
| 51 | monotonic_clock::max_time); |
Austin Schuh | 39788ff | 2019-12-01 18:22:57 -0800 | [diff] [blame] | 52 | } |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 53 | |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame^] | 54 | const monotonic_clock::time_point monotonic_time = events_list_.begin()->first; |
| 55 | if (cached_event_list_monotonic_time_ != monotonic_time) { |
| 56 | cached_event_list_time_ = ToDistributedClock(monotonic_time); |
| 57 | cached_event_list_monotonic_time_ = monotonic_time; |
| 58 | } |
| 59 | |
| 60 | return std::make_pair(cached_event_list_time_, monotonic_time); |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 61 | } |
| 62 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 63 | void EventScheduler::Shutdown() { |
| 64 | CHECK(!is_running_); |
| 65 | on_shutdown_(); |
| 66 | } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 67 | |
| 68 | void EventScheduler::Startup() { |
| 69 | ++boot_count_; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 70 | CHECK(!is_running_); |
| 71 | MaybeRunOnStartup(); |
| 72 | CHECK(called_started_); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 73 | } |
| 74 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 75 | void EventScheduler::CallOldestEvent() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 76 | if (!called_started_) { |
| 77 | // If we haven't started, start. |
| 78 | MaybeRunOnStartup(); |
| 79 | MaybeRunOnRun(); |
| 80 | CHECK(called_started_); |
| 81 | return; |
| 82 | } |
| 83 | CHECK(is_running_); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 84 | CHECK_GT(events_list_.size(), 0u); |
| 85 | auto iter = events_list_.begin(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 86 | const logger::BootTimestamp t = |
| 87 | FromDistributedClock(scheduler_scheduler_->distributed_now()); |
Austin Schuh | c1ee1b6 | 2022-03-22 17:09:52 -0700 | [diff] [blame] | 88 | VLOG(2) << "Got time back " << t; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 89 | CHECK_EQ(t.boot, boot_count_); |
| 90 | CHECK_EQ(t.time, iter->first) << ": Time is wrong on node " << node_index_; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 91 | |
Austin Schuh | ef8f1ae | 2021-12-11 12:35:05 -0800 | [diff] [blame] | 92 | Event *callback = iter->second; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 93 | events_list_.erase(iter); |
Austin Schuh | ef8f1ae | 2021-12-11 12:35:05 -0800 | [diff] [blame] | 94 | callback->Handle(); |
Austin Schuh | b7c8d2a | 2021-07-19 19:22:12 -0700 | [diff] [blame] | 95 | |
| 96 | converter_->ObserveTimePassed(scheduler_scheduler_->distributed_now()); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 97 | } |
| 98 | |
| 99 | void EventScheduler::RunOnRun() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 100 | CHECK(is_running_); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 101 | while (!on_run_.empty()) { |
| 102 | std::function<void()> fn = std::move(*on_run_.begin()); |
| 103 | on_run_.erase(on_run_.begin()); |
| 104 | fn(); |
Austin Schuh | 39788ff | 2019-12-01 18:22:57 -0800 | [diff] [blame] | 105 | } |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 106 | } |
| 107 | |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 108 | void EventScheduler::RunOnStartup() noexcept { |
| 109 | while (!on_startup_.empty()) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 110 | CHECK(!is_running_); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 111 | std::function<void()> fn = std::move(*on_startup_.begin()); |
| 112 | on_startup_.erase(on_startup_.begin()); |
| 113 | fn(); |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 114 | } |
Austin Schuh | 057d29f | 2021-08-21 23:05:15 -0700 | [diff] [blame] | 115 | } |
| 116 | |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 117 | void EventScheduler::RunStarted() { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 118 | CHECK(!is_running_); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 119 | if (started_) { |
| 120 | started_(); |
| 121 | } |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 122 | is_running_ = true; |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 123 | } |
| 124 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 125 | void EventScheduler::MaybeRunStopped() { |
| 126 | CHECK(is_running_); |
| 127 | is_running_ = false; |
| 128 | if (called_started_) { |
| 129 | called_started_ = false; |
| 130 | if (stopped_) { |
| 131 | stopped_(); |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | void EventScheduler::MaybeRunOnStartup() { |
| 137 | CHECK(!called_started_); |
| 138 | CHECK(!is_running_); |
| 139 | const logger::BootTimestamp t = |
| 140 | FromDistributedClock(scheduler_scheduler_->distributed_now()); |
| 141 | if (t.boot == boot_count_ && t.time >= monotonic_clock::epoch()) { |
| 142 | called_started_ = true; |
| 143 | RunOnStartup(); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | void EventScheduler::MaybeRunOnRun() { |
| 148 | if (called_started_) { |
| 149 | RunStarted(); |
| 150 | RunOnRun(); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 151 | } |
| 152 | } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 153 | |
Austin Schuh | ac0771c | 2020-01-07 18:36:30 -0800 | [diff] [blame] | 154 | std::ostream &operator<<(std::ostream &stream, |
| 155 | const aos::distributed_clock::time_point &now) { |
| 156 | // Print it the same way we print a monotonic time. Literally. |
| 157 | stream << monotonic_clock::time_point(now.time_since_epoch()); |
| 158 | return stream; |
| 159 | } |
| 160 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 161 | void EventSchedulerScheduler::AddEventScheduler(EventScheduler *scheduler) { |
| 162 | CHECK(std::find(schedulers_.begin(), schedulers_.end(), scheduler) == |
| 163 | schedulers_.end()); |
| 164 | CHECK(scheduler->scheduler_scheduler_ == nullptr); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 165 | CHECK_EQ(scheduler->node_index(), schedulers_.size()); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 166 | |
| 167 | schedulers_.emplace_back(scheduler); |
| 168 | scheduler->scheduler_scheduler_ = this; |
| 169 | } |
| 170 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 171 | void EventSchedulerScheduler::MaybeRunStopped() { |
| 172 | CHECK(!is_running_); |
| 173 | for (EventScheduler *scheduler : schedulers_) { |
| 174 | if (scheduler->is_running()) { |
| 175 | scheduler->MaybeRunStopped(); |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | bool EventSchedulerScheduler::RunUntil( |
| 181 | realtime_clock::time_point end_time, EventScheduler *scheduler, |
| 182 | std::function<std::chrono::nanoseconds()> fn_realtime_offset) { |
| 183 | logging::ScopedLogRestorer prev_logger; |
| 184 | MaybeRunOnStartup(); |
| 185 | |
| 186 | bool reached_end_time = false; |
| 187 | |
| 188 | RunMaybeRealtimeLoop([this, scheduler, end_time, fn_realtime_offset, |
| 189 | &reached_end_time]() { |
| 190 | std::tuple<distributed_clock::time_point, EventScheduler *> oldest_event = |
| 191 | OldestEvent(); |
| 192 | aos::distributed_clock::time_point oldest_event_time_distributed = |
| 193 | std::get<0>(oldest_event); |
| 194 | logger::BootTimestamp test_time_monotonic = |
| 195 | scheduler->FromDistributedClock(oldest_event_time_distributed); |
| 196 | realtime_clock::time_point oldest_event_realtime( |
| 197 | test_time_monotonic.time_since_epoch() + fn_realtime_offset()); |
| 198 | |
| 199 | if ((std::get<0>(oldest_event) == distributed_clock::max_time) || |
| 200 | (oldest_event_realtime > end_time && |
| 201 | (reboots_.empty() || |
| 202 | std::get<0>(reboots_.front()) > oldest_event_time_distributed))) { |
| 203 | is_running_ = false; |
| 204 | reached_end_time = true; |
| 205 | |
| 206 | // We have to nudge our time back to the distributed time |
| 207 | // corresponding to our desired realtime time. |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame^] | 208 | const monotonic_clock::time_point end_monotonic = |
| 209 | monotonic_clock::epoch() + end_time.time_since_epoch() - |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 210 | fn_realtime_offset(); |
| 211 | const aos::distributed_clock::time_point end_time_distributed = |
| 212 | scheduler->ToDistributedClock(end_monotonic); |
| 213 | |
| 214 | now_ = end_time_distributed; |
| 215 | |
| 216 | return; |
| 217 | } |
| 218 | |
| 219 | if (!reboots_.empty() && |
| 220 | std::get<0>(reboots_.front()) <= std::get<0>(oldest_event)) { |
| 221 | // Reboot is next. |
| 222 | CHECK_LE(now_, |
| 223 | std::get<0>(reboots_.front()) + std::chrono::nanoseconds(1)) |
| 224 | << ": Simulated time went backwards by too much. Please " |
| 225 | "investigate."; |
| 226 | now_ = std::get<0>(reboots_.front()); |
| 227 | Reboot(); |
| 228 | reboots_.erase(reboots_.begin()); |
| 229 | return; |
| 230 | } |
| 231 | |
| 232 | // We get to pick our tradeoffs here. Either we assume that there are |
| 233 | // no backward step changes in our time function for each node, or we |
| 234 | // have to let time go backwards. We currently only really see this |
| 235 | // happen when 2 events are scheduled for "now", time changes, and |
| 236 | // there is a nanosecond or two of rounding due to integer math. |
| 237 | // |
| 238 | // //aos/events/logging:logger_test triggers this. |
| 239 | CHECK_LE(now_, std::get<0>(oldest_event) + std::chrono::nanoseconds(1)) |
| 240 | << ": Simulated time went backwards by too much. Please " |
| 241 | "investigate."; |
| 242 | |
| 243 | now_ = std::get<0>(oldest_event); |
| 244 | |
| 245 | std::get<1>(oldest_event)->CallOldestEvent(); |
| 246 | }); |
| 247 | |
| 248 | MaybeRunStopped(); |
| 249 | |
| 250 | return reached_end_time; |
| 251 | } |
| 252 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 253 | void EventSchedulerScheduler::Reboot() { |
| 254 | const std::vector<logger::BootTimestamp> × = |
| 255 | std::get<1>(reboots_.front()); |
| 256 | CHECK_EQ(times.size(), schedulers_.size()); |
| 257 | |
| 258 | VLOG(1) << "Rebooting at " << now_; |
| 259 | for (const auto &time : times) { |
| 260 | VLOG(1) << " " << time; |
| 261 | } |
| 262 | |
| 263 | is_running_ = false; |
| 264 | |
| 265 | // Shut everything down. |
| 266 | std::vector<size_t> rebooted; |
| 267 | for (size_t node_index = 0; node_index < schedulers_.size(); ++node_index) { |
| 268 | if (schedulers_[node_index]->boot_count() == times[node_index].boot) { |
| 269 | continue; |
| 270 | } else { |
| 271 | rebooted.emplace_back(node_index); |
| 272 | CHECK_EQ(schedulers_[node_index]->boot_count() + 1, |
| 273 | times[node_index].boot); |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 274 | schedulers_[node_index]->MaybeRunStopped(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 275 | schedulers_[node_index]->Shutdown(); |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | // And start it back up again to reboot. When something starts back up |
| 280 | // (especially message_bridge), it could try to send stuff out. We want |
| 281 | // to move everything over to the new boot before doing that. |
| 282 | for (const size_t node_index : rebooted) { |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 283 | schedulers_[node_index]->Startup(); |
| 284 | } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 285 | for (const size_t node_index : rebooted) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 286 | schedulers_[node_index]->MaybeRunOnRun(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 287 | } |
| 288 | is_running_ = true; |
| 289 | } |
| 290 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 291 | void EventSchedulerScheduler::RunFor(distributed_clock::duration duration) { |
| 292 | distributed_clock::time_point end_time = now_ + duration; |
| 293 | logging::ScopedLogRestorer prev_logger; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 294 | MaybeRunOnStartup(); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 295 | |
| 296 | // Run all the sub-event-schedulers. |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 297 | RunMaybeRealtimeLoop([this, end_time]() { |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 298 | std::tuple<distributed_clock::time_point, EventScheduler *> oldest_event = |
| 299 | OldestEvent(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 300 | if (!reboots_.empty() && |
| 301 | std::get<0>(reboots_.front()) <= std::get<0>(oldest_event)) { |
| 302 | // Reboot is next. |
| 303 | if (std::get<0>(reboots_.front()) > end_time) { |
| 304 | // Reboot is after our end time, give up. |
| 305 | is_running_ = false; |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 306 | return; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 307 | } |
| 308 | |
| 309 | CHECK_LE(now_, |
| 310 | std::get<0>(reboots_.front()) + std::chrono::nanoseconds(1)) |
| 311 | << ": Simulated time went backwards by too much. Please " |
| 312 | "investigate."; |
| 313 | now_ = std::get<0>(reboots_.front()); |
| 314 | Reboot(); |
| 315 | reboots_.erase(reboots_.begin()); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 316 | return; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 317 | } |
| 318 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 319 | // No events left, bail. |
| 320 | if (std::get<0>(oldest_event) == distributed_clock::max_time || |
| 321 | std::get<0>(oldest_event) > end_time) { |
| 322 | is_running_ = false; |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 323 | return; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 324 | } |
| 325 | |
| 326 | // We get to pick our tradeoffs here. Either we assume that there are no |
| 327 | // backward step changes in our time function for each node, or we have to |
Austin Schuh | 2f8fd75 | 2020-09-01 22:38:28 -0700 | [diff] [blame] | 328 | // let time go backwards. We currently only really see this happen when 2 |
| 329 | // events are scheduled for "now", time changes, and there is a nanosecond |
| 330 | // or two of rounding due to integer math. |
| 331 | // |
| 332 | // //aos/events/logging:logger_test triggers this. |
| 333 | CHECK_LE(now_, std::get<0>(oldest_event) + std::chrono::nanoseconds(1)) |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 334 | << ": Simulated time went backwards by too much. Please investigate."; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 335 | // push time forwards |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 336 | now_ = std::get<0>(oldest_event); |
| 337 | |
| 338 | std::get<1>(oldest_event)->CallOldestEvent(); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 339 | }); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 340 | |
| 341 | now_ = end_time; |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 342 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 343 | MaybeRunStopped(); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 344 | } |
| 345 | |
| 346 | void EventSchedulerScheduler::Run() { |
| 347 | logging::ScopedLogRestorer prev_logger; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 348 | MaybeRunOnStartup(); |
| 349 | |
| 350 | // Run all the sub-event-schedulers. |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 351 | RunMaybeRealtimeLoop([this]() { |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 352 | std::tuple<distributed_clock::time_point, EventScheduler *> oldest_event = |
| 353 | OldestEvent(); |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 354 | if (!reboots_.empty() && |
| 355 | std::get<0>(reboots_.front()) <= std::get<0>(oldest_event)) { |
| 356 | // Reboot is next. |
| 357 | CHECK_LE(now_, |
| 358 | std::get<0>(reboots_.front()) + std::chrono::nanoseconds(1)) |
| 359 | << ": Simulated time went backwards by too much. Please " |
| 360 | "investigate."; |
| 361 | now_ = std::get<0>(reboots_.front()); |
| 362 | Reboot(); |
| 363 | reboots_.erase(reboots_.begin()); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 364 | return; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 365 | } |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 366 | // No events left, bail. |
| 367 | if (std::get<0>(oldest_event) == distributed_clock::max_time) { |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 368 | is_running_ = false; |
| 369 | return; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 370 | } |
| 371 | |
| 372 | // We get to pick our tradeoffs here. Either we assume that there are no |
| 373 | // backward step changes in our time function for each node, or we have to |
Austin Schuh | 2f8fd75 | 2020-09-01 22:38:28 -0700 | [diff] [blame] | 374 | // let time go backwards. We currently only really see this happen when 2 |
| 375 | // events are scheduled for "now", time changes, and there is a nanosecond |
| 376 | // or two of rounding due to integer math. |
| 377 | // |
| 378 | // //aos/events/logging:logger_test triggers this. |
| 379 | CHECK_LE(now_, std::get<0>(oldest_event) + std::chrono::nanoseconds(1)) |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 380 | << ": Simulated time went backwards by too much. Please investigate."; |
| 381 | now_ = std::get<0>(oldest_event); |
| 382 | |
| 383 | std::get<1>(oldest_event)->CallOldestEvent(); |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 384 | }); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 385 | |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 386 | MaybeRunStopped(); |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 387 | } |
| 388 | |
James Kuszmaul | b67409b | 2022-06-20 16:25:03 -0700 | [diff] [blame] | 389 | template <typename F> |
| 390 | void EventSchedulerScheduler::RunMaybeRealtimeLoop(F loop_body) { |
| 391 | internal::TimerFd timerfd; |
| 392 | CHECK_LT(0.0, replay_rate_) << "Replay rate must be positive."; |
| 393 | distributed_clock::time_point last_distributed_clock = |
| 394 | std::get<0>(OldestEvent()); |
| 395 | monotonic_clock::time_point last_monotonic_clock = monotonic_clock::now(); |
| 396 | timerfd.SetTime(last_monotonic_clock, std::chrono::seconds(0)); |
| 397 | epoll_.OnReadable( |
| 398 | timerfd.fd(), [this, &last_distributed_clock, &last_monotonic_clock, |
| 399 | &timerfd, loop_body]() { |
| 400 | const uint64_t read_result = timerfd.Read(); |
| 401 | if (!is_running_) { |
| 402 | epoll_.Quit(); |
| 403 | return; |
| 404 | } |
| 405 | CHECK_EQ(read_result, 1u); |
| 406 | // Call loop_body() at least once; if we are in infinite-speed replay, |
| 407 | // we don't actually want/need the context switches from the epoll |
| 408 | // setup, so just loop. |
| 409 | // Note: The performance impacts of this code have not been carefully |
| 410 | // inspected (e.g., how much does avoiding the context-switch help; does |
| 411 | // the timerfd_settime call matter). |
| 412 | // This is deliberately written to support the user changing replay |
| 413 | // rates dynamically. |
| 414 | do { |
| 415 | loop_body(); |
| 416 | if (is_running_) { |
| 417 | const monotonic_clock::time_point next_trigger = |
| 418 | last_monotonic_clock + |
| 419 | std::chrono::duration_cast<std::chrono::nanoseconds>( |
| 420 | (now_ - last_distributed_clock) / replay_rate_); |
| 421 | timerfd.SetTime(next_trigger, std::chrono::seconds(0)); |
| 422 | last_monotonic_clock = next_trigger; |
| 423 | last_distributed_clock = now_; |
| 424 | } else { |
| 425 | epoll_.Quit(); |
| 426 | } |
| 427 | } while (replay_rate_ == std::numeric_limits<double>::infinity() && |
| 428 | is_running_); |
| 429 | }); |
| 430 | |
| 431 | epoll_.Run(); |
| 432 | epoll_.DeleteFd(timerfd.fd()); |
| 433 | } |
| 434 | |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 435 | std::tuple<distributed_clock::time_point, EventScheduler *> |
| 436 | EventSchedulerScheduler::OldestEvent() { |
| 437 | distributed_clock::time_point min_event_time = distributed_clock::max_time; |
| 438 | EventScheduler *min_scheduler = nullptr; |
| 439 | |
| 440 | // TODO(austin): Don't linearly search... But for N=3, it is probably the |
| 441 | // fastest way to do this. |
| 442 | for (EventScheduler *scheduler : schedulers_) { |
Austin Schuh | e12b5eb | 2022-08-29 12:39:27 -0700 | [diff] [blame^] | 443 | const std::pair<distributed_clock::time_point, monotonic_clock::time_point> |
| 444 | event_time = scheduler->OldestEvent(); |
| 445 | if (event_time.second != monotonic_clock::max_time) { |
| 446 | if (event_time.first < min_event_time) { |
| 447 | min_event_time = event_time.first; |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 448 | min_scheduler = scheduler; |
| 449 | } |
| 450 | } |
| 451 | } |
| 452 | |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 453 | if (min_scheduler) { |
Austin Schuh | c1ee1b6 | 2022-03-22 17:09:52 -0700 | [diff] [blame] | 454 | VLOG(2) << "Oldest event " << min_event_time << " on scheduler " |
Austin Schuh | 87dd383 | 2021-01-01 23:07:31 -0800 | [diff] [blame] | 455 | << min_scheduler->node_index_; |
| 456 | } |
Austin Schuh | 8bd9632 | 2020-02-13 21:18:22 -0800 | [diff] [blame] | 457 | return std::make_tuple(min_event_time, min_scheduler); |
| 458 | } |
| 459 | |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 460 | void EventSchedulerScheduler::TemporarilyStopAndRun(std::function<void()> fn) { |
| 461 | const bool was_running = is_running_; |
| 462 | if (is_running_) { |
| 463 | is_running_ = false; |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 464 | MaybeRunStopped(); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 465 | } |
| 466 | fn(); |
| 467 | if (was_running) { |
James Kuszmaul | 86e86c3 | 2022-07-21 17:39:47 -0700 | [diff] [blame] | 468 | MaybeRunOnStartup(); |
| 469 | } |
| 470 | } |
| 471 | |
| 472 | void EventSchedulerScheduler::MaybeRunOnStartup() { |
| 473 | is_running_ = true; |
| 474 | for (EventScheduler *scheduler : schedulers_) { |
| 475 | scheduler->MaybeRunOnStartup(); |
| 476 | } |
| 477 | // We must trigger all the OnRun's *after* all the OnStartup callbacks are |
| 478 | // triggered because that is the contract that we have stated. |
| 479 | for (EventScheduler *scheduler : schedulers_) { |
| 480 | scheduler->MaybeRunOnRun(); |
Austin Schuh | e33c08d | 2022-02-03 18:15:21 -0800 | [diff] [blame] | 481 | } |
| 482 | } |
| 483 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 484 | } // namespace aos |