Fix crash when simulated event loop gets destroyed before OnRun called

We were saving a std::function for each OnRun callback inside the
EventScheduler, and continuing to call it regardless of if the EventLoop
which added it was still alive.  This was triggering segfaults when
trying to access the destroyed event loop.  Instead, save a pointer to
the event loop, and clean it up when the event loop is destroyed so this
works as the user would expect.

While we are here, and since the behavior needs to change as part of
this change, only let OnRun callbacks be scheduled before the event loop
has run any.  Test this too.

Change-Id: I467a00c9f0981669e77fd03927ceeb149a79e6c4
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/events/event_scheduler.cc b/aos/events/event_scheduler.cc
index df6ab81..03abdd6 100644
--- a/aos/events/event_scheduler.cc
+++ b/aos/events/event_scheduler.cc
@@ -101,9 +101,9 @@
 void EventScheduler::RunOnRun() {
   CHECK(is_running_);
   while (!on_run_.empty()) {
-    std::function<void()> fn = std::move(*on_run_.begin());
+    Event *event = *on_run_.begin();
     on_run_.erase(on_run_.begin());
-    fn();
+    event->Handle();
   }
 }
 
diff --git a/aos/events/event_scheduler.h b/aos/events/event_scheduler.h
index 84201ed..68fcf28 100644
--- a/aos/events/event_scheduler.h
+++ b/aos/events/event_scheduler.h
@@ -114,8 +114,13 @@
   // entered the running state. Callbacks are cleared after being called once.
   // Will not get called until a node starts (a node does not start until its
   // monotonic clock has reached at least monotonic_clock::epoch()).
-  void ScheduleOnRun(std::function<void()> callback) {
-    on_run_.emplace_back(std::move(callback));
+  void ScheduleOnRun(Event *callback) { on_run_.emplace_back(callback); }
+
+  // Removes an event from the OnRun list without running it.
+  void DeleteOnRun(Event *callback) {
+    auto it = std::find(on_run_.begin(), on_run_.end(), callback);
+    CHECK(it != on_run_.end());
+    on_run_.erase(it);
   }
 
   // Schedules a callback whenever the event scheduler starts, before we have
@@ -220,7 +225,7 @@
   size_t boot_count_ = 0;
 
   // List of functions to run (once) when running.
-  std::vector<std::function<void()>> on_run_;
+  std::vector<Event *> on_run_;
   std::vector<std::function<void()>> on_startup_;
 
   // Multimap holding times to run functions.  These are stored in order, and
diff --git a/aos/events/event_scheduler_test.cc b/aos/events/event_scheduler_test.cc
index d328552..45a0dd0 100644
--- a/aos/events/event_scheduler_test.cc
+++ b/aos/events/event_scheduler_test.cc
@@ -264,7 +264,7 @@
               schedulers_.at(0).monotonic_now());
     EXPECT_EQ(monotonic_clock::epoch(), schedulers_.at(1).monotonic_now());
   });
-  schedulers_.at(1).ScheduleOnRun([this, &observed_on_run_1]() {
+  FunctionEvent on_run_event([this, &observed_on_run_1]() {
     observed_on_run_1 = true;
     // Note that we do not *stop* execution on node zero just to get 1 started.
     EXPECT_TRUE(schedulers_.at(0).is_running());
@@ -275,6 +275,7 @@
               schedulers_.at(0).monotonic_now());
     EXPECT_EQ(monotonic_clock::epoch(), schedulers_.at(1).monotonic_now());
   });
+  schedulers_.at(1).ScheduleOnRun(&on_run_event);
 
   FunctionEvent e([]() {});
   schedulers_.at(0).Schedule(monotonic_clock::epoch() + chrono::seconds(1), &e);
@@ -303,8 +304,9 @@
   });
   schedulers_.at(1).ScheduleOnStartup(
       []() { FAIL() << "Should never have hit startup handlers for node 1."; });
-  schedulers_.at(1).ScheduleOnRun(
+  FunctionEvent fail(
       []() { FAIL() << "Should never have hit OnRun handlers for node 1."; });
+  schedulers_.at(1).ScheduleOnRun(&fail);
   schedulers_.at(1).set_stopped(
       []() { FAIL() << "Should never have hit stopped handlers for node 1."; });
 
@@ -363,25 +365,25 @@
     ++startup_counter_b;
   };
 
-  auto on_run_handler_a = [this, &on_run_counter_a]() {
+  auto on_run_handler_a = FunctionEvent([this, &on_run_counter_a]() {
     EXPECT_TRUE(schedulers_.at(0).is_running());
     ++on_run_counter_a;
-  };
+  });
 
-  auto on_run_handler_b = [this, &on_run_counter_b]() {
+  auto on_run_handler_b = FunctionEvent([this, &on_run_counter_b]() {
     EXPECT_TRUE(schedulers_.at(0).is_running());
     ++on_run_counter_b;
-  };
+  });
 
   schedulers_.at(0).set_stopped([this, &stopped_counter]() {
     EXPECT_FALSE(schedulers_.at(0).is_running());
     ++stopped_counter;
   });
   schedulers_.at(0).set_on_shutdown(
-      [this, &shutdown_counter, startup_handler_a, on_run_handler_a]() {
+      [this, &shutdown_counter, startup_handler_a, &on_run_handler_a]() {
         EXPECT_FALSE(schedulers_.at(0).is_running());
         schedulers_.at(0).ScheduleOnStartup(startup_handler_a);
-        schedulers_.at(0).ScheduleOnRun(on_run_handler_a);
+        schedulers_.at(0).ScheduleOnRun(&on_run_handler_a);
         ++shutdown_counter;
       });
   schedulers_.at(0).ScheduleOnStartup(startup_handler_a);
@@ -389,7 +391,7 @@
     EXPECT_FALSE(schedulers_.at(0).is_running());
     ++started_counter;
   });
-  schedulers_.at(0).ScheduleOnRun(on_run_handler_a);
+  schedulers_.at(0).ScheduleOnRun(&on_run_handler_a);
 
   FunctionEvent e([]() {});
   schedulers_.at(0).Schedule(monotonic_clock::epoch() + chrono::seconds(1), &e);
@@ -405,12 +407,12 @@
   // In the middle, execute a TemporarilyStopAndRun. Use it to re-register the
   // startup handlers.
   schedulers_.at(0).ScheduleOnStartup(startup_handler_b);
-  schedulers_.at(0).ScheduleOnRun(on_run_handler_b);
-  FunctionEvent stop_and_run([this, startup_handler_a, on_run_handler_a]() {
+  schedulers_.at(0).ScheduleOnRun(&on_run_handler_b);
+  FunctionEvent stop_and_run([this, startup_handler_a, &on_run_handler_a]() {
     scheduler_scheduler_.TemporarilyStopAndRun(
-        [this, startup_handler_a, on_run_handler_a]() {
+        [this, startup_handler_a, &on_run_handler_a]() {
           schedulers_.at(0).ScheduleOnStartup(startup_handler_a);
-          schedulers_.at(0).ScheduleOnRun(on_run_handler_a);
+          schedulers_.at(0).ScheduleOnRun(&on_run_handler_a);
         });
   });
   schedulers_.at(1).Schedule(monotonic_clock::epoch() + chrono::seconds(2),
@@ -496,7 +498,8 @@
        BootTimestamp{0, monotonic_clock::epoch() - chrono::seconds(10)}});
   int counter = 0;
 
-  schedulers_[1].ScheduleOnRun([]() { FAIL(); });
+  FunctionEvent fail_event([]() { FAIL(); });
+  schedulers_[1].ScheduleOnRun(&fail_event);
   schedulers_[1].ScheduleOnStartup([]() { FAIL(); });
   schedulers_[1].set_on_shutdown([]() { FAIL(); });
   schedulers_[1].set_started([]() { FAIL(); });
diff --git a/aos/events/simulated_event_loop.cc b/aos/events/simulated_event_loop.cc
index 1b05841..d698b7d 100644
--- a/aos/events/simulated_event_loop.cc
+++ b/aos/events/simulated_event_loop.cc
@@ -605,9 +605,17 @@
     });
 
     event_loops_->push_back(this);
+    scheduler_->ScheduleOnRun(&on_run_event_);
+    on_run_scheduled_ = true;
   }
 
   ~SimulatedEventLoop() override {
+    // Unregister any on_run callbacks to handle cleanup before they run
+    // correctly.
+    if (on_run_scheduled_) {
+      scheduler_->DeleteOnRun(&on_run_event_);
+    }
+
     // Trigger any remaining senders or fetchers to be cleared before destroying
     // the event loop so the book keeping matches.
     timing_report_sender_.reset();
@@ -683,18 +691,41 @@
             scheduler_, this, callback, interval, offset)));
   }
 
+  class OnRunEvent : public EventScheduler::Event {
+   public:
+    OnRunEvent(SimulatedEventLoop *loop) : loop_(loop) {}
+
+    virtual void Handle() noexcept { loop_->DoOnRun(); }
+
+   private:
+    SimulatedEventLoop *loop_;
+  };
+
   void OnRun(::std::function<void()> on_run) override {
     CHECK(!is_running()) << ": Cannot register OnRun callback while running.";
-    scheduler_->ScheduleOnRun([this, on_run = std::move(on_run)]() {
+    CHECK(on_run_scheduled_)
+        << "Registering OnRun callback after running on " << name();
+    on_run_.emplace_back(std::move(on_run));
+  }
+
+  // Called by OnRunEvent when we need to process OnRun callbacks.
+  void DoOnRun() {
+    VLOG(1) << distributed_now() << " " << NodeName(node()) << monotonic_now()
+            << " " << name() << " OnRun()";
+    on_run_scheduled_ = false;
+    while (!on_run_.empty()) {
+      std::function<void()> fn = std::move(*on_run_.begin());
+      on_run_.erase(on_run_.begin());
+
       logging::ScopedLogRestorer prev_logger;
       if (log_impl_) {
         prev_logger.Swap(log_impl_);
       }
       ScopedMarkRealtimeRestorer rt(runtime_realtime_priority() > 0);
       SetTimerContext(monotonic_now());
-      on_run();
+      fn();
       ClearContext();
-    });
+    }
   }
 
   const Node *node() const override { return node_; }
@@ -787,6 +818,15 @@
   std::shared_ptr<StartupTracker> startup_tracker_;
 
   EventLoopOptions options_;
+
+  // Event used by EventScheduler to run OnRun callbacks.
+  OnRunEvent on_run_event_{this};
+
+  // True if the on run callbacks have been scheduled in the scheduler, and
+  // false if they have been executed.
+  bool on_run_scheduled_;
+
+  std::vector<std::function<void()>> on_run_;
 };
 
 void SimulatedEventLoopFactory::set_send_delay(
@@ -1308,8 +1348,9 @@
 void SimulatedPhasedLoopHandler::HandleEvent() noexcept {
   monotonic_clock::time_point monotonic_now =
       simulated_event_loop_->monotonic_now();
-  VLOG(1) << monotonic_now << " Phased loop " << simulated_event_loop_->name()
-          << ", " << name();
+  VLOG(1) << simulated_event_loop_->scheduler_->distributed_now() << " "
+          << NodeName(simulated_event_loop_->node()) << monotonic_now << " "
+          << simulated_event_loop_->name() << " Phased loop '" << name() << "'";
   logging::ScopedLogRestorer prev_logger;
   if (simulated_event_loop_->log_impl_) {
     prev_logger.Swap(simulated_event_loop_->log_impl_);
diff --git a/aos/events/simulated_event_loop_test.cc b/aos/events/simulated_event_loop_test.cc
index 745a273..429d39c 100644
--- a/aos/events/simulated_event_loop_test.cc
+++ b/aos/events/simulated_event_loop_test.cc
@@ -181,6 +181,66 @@
   EXPECT_EQ(test_message_counter2.count(), 0u);
 }
 
+// Test that OnRun callbacks get deleted if the event loop gets deleted.
+TEST(SimulatedEventLoopTest, DestructEventLoopBeforeOnRun) {
+  SimulatedEventLoopTestFactory factory;
+
+  SimulatedEventLoopFactory simulated_event_loop_factory(
+      factory.configuration());
+
+  {
+    ::std::unique_ptr<EventLoop> test_event_loop =
+        simulated_event_loop_factory.MakeEventLoop("test");
+    test_event_loop->OnRun([]() { LOG(FATAL) << "Don't run this"; });
+  }
+
+  simulated_event_loop_factory.RunFor(chrono::seconds(1));
+}
+
+// Tests that the order event loops are created is the order that the OnRun
+// callbacks are run.
+TEST(SimulatedEventLoopTest, OnRunOrderFollowsConstructionOrder) {
+  SimulatedEventLoopTestFactory factory;
+
+  SimulatedEventLoopFactory simulated_event_loop_factory(
+      factory.configuration());
+
+  int count = 0;
+
+  std::unique_ptr<EventLoop> test1_event_loop =
+      simulated_event_loop_factory.MakeEventLoop("test1");
+  std::unique_ptr<EventLoop> test2_event_loop =
+      simulated_event_loop_factory.MakeEventLoop("test2");
+  test2_event_loop->OnRun([&count]() {
+    EXPECT_EQ(count, 1u);
+    ++count;
+  });
+  test1_event_loop->OnRun([&count]() {
+    EXPECT_EQ(count, 0u);
+    ++count;
+  });
+
+  simulated_event_loop_factory.RunFor(chrono::seconds(1));
+
+  EXPECT_EQ(count, 2u);
+}
+
+// Test that we can't register OnRun callbacks after starting.
+TEST(SimulatedEventLoopDeathTest, OnRunAfterRunning) {
+  SimulatedEventLoopTestFactory factory;
+
+  SimulatedEventLoopFactory simulated_event_loop_factory(
+      factory.configuration());
+
+  std::unique_ptr<EventLoop> test_event_loop =
+      simulated_event_loop_factory.MakeEventLoop("test");
+  test_event_loop->OnRun([]() {});
+
+  simulated_event_loop_factory.RunFor(chrono::seconds(1));
+
+  EXPECT_DEATH(test_event_loop->OnRun([]() {}), "OnRun");
+}
+
 // Test that if we configure an event loop to be able to send too fast that we
 // do allow it to do so.
 TEST(SimulatedEventLoopTest, AllowSendTooFast) {