Pipe through exit statuses in AOS Run() methods

This makes it so that the sundry Run() methods can return exit statuses
when exited using the ExitHandle. This enables certain categories of
error-handling that were hard to do previously---in particular, this
enables future modifications to LogReader to propagate log reading
faults through this interface.

Change-Id: I238e7aa6b82dc3d7938630b2b0ab96eb30b573eb
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/events/BUILD b/aos/events/BUILD
index d799000..ace634c 100644
--- a/aos/events/BUILD
+++ b/aos/events/BUILD
@@ -122,7 +122,9 @@
         "//aos/logging",
         "//aos/time",
         "//aos/util:phased_loop",
+        "//aos/util:status",
         "@com_github_google_flatbuffers//:flatbuffers",
+        "@com_github_tartanllama_expected",
         "@com_google_absl//absl/container:btree",
     ],
 )
diff --git a/aos/events/event_loop.h b/aos/events/event_loop.h
index 93c5663..d45dddf 100644
--- a/aos/events/event_loop.h
+++ b/aos/events/event_loop.h
@@ -1,6 +1,5 @@
 #ifndef AOS_EVENTS_EVENT_LOOP_H_
 #define AOS_EVENTS_EVENT_LOOP_H_
-
 #include <sched.h>
 
 #include <atomic>
@@ -11,6 +10,7 @@
 #include "absl/container/btree_set.h"
 #include "flatbuffers/flatbuffers.h"
 #include "glog/logging.h"
+#include "tl/expected.hpp"
 
 #include "aos/configuration.h"
 #include "aos/configuration_generated.h"
@@ -26,6 +26,7 @@
 #include "aos/json_to_flatbuffer.h"
 #include "aos/time/time.h"
 #include "aos/util/phased_loop.h"
+#include "aos/util/status.h"
 #include "aos/uuid.h"
 
 DECLARE_bool(timing_reports);
@@ -1040,7 +1041,12 @@
   //
   // This means no more events will be processed, but any currently being
   // processed will finish.
-  virtual void Exit() = 0;
+  virtual void Exit(Result<void> result) = 0;
+  // Overload for a successful exit---equivalent to if we specified a default
+  // parameter for Exit(), except that autocxx does not understand default
+  // arguments and so needs an explicit overload to keep rust happy
+  // (https://github.com/google/autocxx/issues/563).
+  void Exit() { Exit({}); }
 };
 
 }  // namespace aos
diff --git a/aos/events/event_loop_param_test.cc b/aos/events/event_loop_param_test.cc
index 4885f5c..4975540 100644
--- a/aos/events/event_loop_param_test.cc
+++ b/aos/events/event_loop_param_test.cc
@@ -1,6 +1,7 @@
 #include "aos/events/event_loop_param_test.h"
 
 #include <chrono>
+#include <filesystem>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -3797,4 +3798,47 @@
   EXPECT_EQ(SendTestMessage(sender1), RawSender::Error::kMessagesSentTooFast);
 }
 
+// Tests that we can exit with a default constructor and that Run() will
+// indicate a successful exit.
+TEST_P(AbstractEventLoopTest, ExitHandleExitSuccessful) {
+  auto loop = MakePrimary();
+  std::unique_ptr<ExitHandle> exit_handle = MakeExitHandle();
+  bool happened = false;
+
+  loop->OnRun([&exit_handle, &happened]() {
+    happened = true;
+    exit_handle->Exit();
+  });
+
+  EXPECT_TRUE(Run().has_value());
+  EXPECT_TRUE(happened);
+}
+
+// Tests that we can exit with an error Status and have that returned via the
+// Run() method.
+TEST_P(AbstractEventLoopTest, ExitHandleExitFailure) {
+  auto loop = MakePrimary();
+  std::unique_ptr<ExitHandle> exit_handle = MakeExitHandle();
+  bool happened = false;
+
+  loop->OnRun([&exit_handle, &happened]() {
+    happened = true;
+    exit_handle->Exit(Error::MakeUnexpectedError("Hello, World!"));
+    // The second Exit() should not affect the final return value.
+    exit_handle->Exit(Error::MakeUnexpectedError("Hello, World! 2"));
+  });
+  const int line = __LINE__ - 4;
+
+  Result<void> status = Run();
+
+  EXPECT_TRUE(happened);
+  EXPECT_FALSE(status.has_value());
+  EXPECT_EQ(std::string("Hello, World!"), status.error().message());
+  ASSERT_TRUE(status.error().source_location().has_value());
+  EXPECT_EQ(std::string("event_loop_param_test.cc"),
+            std::filesystem::path(status.error().source_location()->file_name())
+                .filename());
+  EXPECT_EQ(line, status.error().source_location()->line());
+}
+
 }  // namespace aos::testing
diff --git a/aos/events/event_loop_param_test.h b/aos/events/event_loop_param_test.h
index f2b16c5..bbba5b7 100644
--- a/aos/events/event_loop_param_test.h
+++ b/aos/events/event_loop_param_test.h
@@ -68,7 +68,9 @@
   virtual std::unique_ptr<EventLoop> MakePrimary(std::string_view name) = 0;
 
   // Runs the loops until they quit.
-  virtual void Run() = 0;
+  virtual Result<void> Run() = 0;
+
+  virtual std::unique_ptr<ExitHandle> MakeExitHandle() = 0;
 
   // Quits the loops.
   virtual void Exit() = 0;
@@ -350,7 +352,11 @@
 
   void EnableNodes(std::string_view my_node) { factory_->EnableNodes(my_node); }
 
-  void Run() { return factory_->Run(); }
+  Result<void> Run() { return factory_->Run(); }
+
+  std::unique_ptr<ExitHandle> MakeExitHandle() {
+    return factory_->MakeExitHandle();
+  }
 
   void Exit() { return factory_->Exit(); }
 
diff --git a/aos/events/shm_event_loop.cc b/aos/events/shm_event_loop.cc
index e15c014..8035235 100644
--- a/aos/events/shm_event_loop.cc
+++ b/aos/events/shm_event_loop.cc
@@ -411,8 +411,15 @@
     CHECK_GT(event_loop_->exit_handle_count_, 0);
     --event_loop_->exit_handle_count_;
   }
+  // Because of how we handle reference counting, we either need to implement
+  // reference counting in the copy/move constructors or just not support them.
+  // If we ever develop a need for this object to be movable/copyable,
+  // supporting it should be straightforwards.
+  DISALLOW_COPY_AND_ASSIGN(ShmExitHandle);
 
-  void Exit() override { event_loop_->Exit(); }
+  void Exit(Result<void> status) override {
+    event_loop_->ExitWithStatus(status);
+  }
 
  private:
   ShmEventLoop *const event_loop_;
@@ -1029,7 +1036,7 @@
   struct sigaction old_action_term_;
 };
 
-void ShmEventLoop::Run() {
+Result<void> ShmEventLoop::Run() {
   CheckCurrentThread();
   SignalHandler::global()->Register(this);
 
@@ -1119,9 +1126,30 @@
   // created the timing reporter.
   timing_report_sender_.reset();
   ClearContext();
+  std::unique_lock<aos::stl_mutex> locker(exit_status_mutex_);
+  std::optional<Result<void>> exit_status;
+  // Clear the stored exit_status_ and extract it to be returned.
+  exit_status_.swap(exit_status);
+  return exit_status.value_or(Result<void>{});
 }
 
-void ShmEventLoop::Exit() { epoll_.Quit(); }
+void ShmEventLoop::Exit() {
+  observed_exit_.test_and_set();
+  // Implicitly defaults exit_status_ to success by not setting it.
+
+  epoll_.Quit();
+}
+
+void ShmEventLoop::ExitWithStatus(Result<void> status) {
+  // Only set the exit status if no other Exit*() call got here first.
+  if (!observed_exit_.test_and_set()) {
+    std::unique_lock<aos::stl_mutex> locker(exit_status_mutex_);
+    exit_status_ = std::move(status);
+  } else {
+    VLOG(1) << "Exit status is already set; not setting it again.";
+  }
+  Exit();
+}
 
 std::unique_ptr<ExitHandle> ShmEventLoop::MakeExitHandle() {
   return std::make_unique<ShmExitHandle>(this);
diff --git a/aos/events/shm_event_loop.h b/aos/events/shm_event_loop.h
index 0e71f96..5da1632 100644
--- a/aos/events/shm_event_loop.h
+++ b/aos/events/shm_event_loop.h
@@ -46,10 +46,18 @@
   void operator=(ShmEventLoop const &) = delete;
 
   // Runs the event loop until Exit is called, or ^C is caught.
-  void Run();
-  // Exits the event loop.  Async safe.
+  Result<void> Run();
+  // Exits the event loop.  async-signal-safe (see
+  // https://man7.org/linux/man-pages/man7/signal-safety.7.html).
+  // Will result in Run() returning a successful result when called.
   void Exit();
 
+  // Exits the event loop with the provided status. Thread-safe, but not
+  // async-safe.
+  void ExitWithStatus(Result<void> status = {});
+
+  // Constructs an exit handle for the EventLoop. The provided ExitHandle uses
+  // ExitWithStatus().
   std::unique_ptr<ExitHandle> MakeExitHandle();
 
   aos::monotonic_clock::time_point monotonic_now() const override {
@@ -183,6 +191,21 @@
 
   // Only set during Run().
   std::unique_ptr<ipc_lib::SignalFd> signalfd_;
+
+  // Calls to Exit() are guaranteed to be thread-safe, so the exit_status_mutex_
+  // guards access to the exit_status_.
+  aos::stl_mutex exit_status_mutex_;
+  // Once exit_status_ is set once, we will not set it again until we have
+  // actually exited. This is to try to provide consistent behavior in cases
+  // where Exit() is called multiple times before Run() is aactually terminates
+  // execution.
+  std::optional<Result<void>> exit_status_{};
+  // Used by the Exit() call to provide an async-safe way of indicating that
+  // Exit() was called.
+  // Will be set once Exit() or ExitWithStatus() has been called.
+  // Note: std::atomic<> is not necessarily guaranteed to be lock-free, although
+  // std::atomic_flag is, and so is safe to use in Exit().
+  std::atomic_flag observed_exit_ = ATOMIC_FLAG_INIT;
 };
 
 }  // namespace aos
diff --git a/aos/events/shm_event_loop_test.cc b/aos/events/shm_event_loop_test.cc
index bae834a..2fbe735 100644
--- a/aos/events/shm_event_loop_test.cc
+++ b/aos/events/shm_event_loop_test.cc
@@ -57,7 +57,13 @@
     return loop;
   }
 
-  void Run() override { CHECK_NOTNULL(primary_event_loop_)->Run(); }
+  Result<void> Run() override {
+    return CHECK_NOTNULL(primary_event_loop_)->Run();
+  }
+
+  std::unique_ptr<ExitHandle> MakeExitHandle() override {
+    return CHECK_NOTNULL(primary_event_loop_)->MakeExitHandle();
+  }
 
   void Exit() override { CHECK_NOTNULL(primary_event_loop_)->Exit(); }
 
@@ -66,7 +72,7 @@
   }
 
  private:
-  ::aos::ShmEventLoop *primary_event_loop_;
+  ::aos::ShmEventLoop *primary_event_loop_ = nullptr;
 };
 
 auto CommonParameters() {
@@ -296,6 +302,21 @@
   EXPECT_EQ(times.size(), 2u);
 }
 
+// Tests that the ShmEventLoop::Exit() method causes the ShmEventLoop to return
+// with a successful status.
+TEST_P(ShmEventLoopTest, SuccessfulExitTest) {
+  auto loop1 = factory()->MakePrimary("primary");
+  auto exit_handle = factory()->MakeExitHandle();
+
+  loop1->OnRun([this, &exit_handle]() {
+    factory()->Exit();
+    // The second Exit() call should get ignored.
+    exit_handle->Exit(aos::Error::MakeUnexpectedError("Hello, World!"));
+  });
+
+  EXPECT_TRUE(factory()->Run().has_value());
+}
+
 // Test GetWatcherSharedMemory in a few basic scenarios.
 TEST_P(ShmEventLoopDeathTest, GetWatcherSharedMemory) {
   auto generic_loop1 = factory()->MakePrimary("primary");
diff --git a/aos/events/simulated_event_loop.cc b/aos/events/simulated_event_loop.cc
index 1b193db..3c84cfb 100644
--- a/aos/events/simulated_event_loop.cc
+++ b/aos/events/simulated_event_loop.cc
@@ -135,7 +135,7 @@
     --factory_->exit_handle_count_;
   }
 
-  void Exit() override { factory_->Exit(); }
+  void Exit(Result<void> status) override { factory_->Exit(status); }
 
  private:
   SimulatedEventLoopFactory *const factory_;
@@ -1559,7 +1559,15 @@
   channels_.clear();
 }
 
-void SimulatedEventLoopFactory::RunFor(monotonic_clock::duration duration) {
+Result<void> SimulatedEventLoopFactory::GetAndClearExitStatus() {
+  std::optional<Result<void>> exit_status;
+  // Clear the stored exit_status_ and extract it to be returned.
+  exit_status_.swap(exit_status);
+  return exit_status.value_or(Result<void>{});
+}
+
+Result<void> SimulatedEventLoopFactory::RunFor(
+    monotonic_clock::duration duration) {
   // This sets running to true too.
   scheduler_scheduler_.RunFor(duration);
   for (std::unique_ptr<NodeEventLoopFactory> &node : node_factories_) {
@@ -1569,14 +1577,20 @@
       }
     }
   }
+  return GetAndClearExitStatus();
 }
 
-bool SimulatedEventLoopFactory::RunUntil(realtime_clock::time_point now,
-                                         const aos::Node *node) {
-  bool ran_until_time = scheduler_scheduler_.RunUntil(
-      now, &GetNodeEventLoopFactory(node)->scheduler_, [this, &node](void) {
-        return GetNodeEventLoopFactory(node)->realtime_offset();
-      });
+Result<SimulatedEventLoopFactory::RunEndState>
+SimulatedEventLoopFactory::RunUntil(realtime_clock::time_point now,
+                                    const aos::Node *node) {
+  RunEndState ran_until_time =
+      scheduler_scheduler_.RunUntil(
+          now, &GetNodeEventLoopFactory(node)->scheduler_,
+          [this, &node](void) {
+            return GetNodeEventLoopFactory(node)->realtime_offset();
+          })
+          ? RunEndState::kEventsRemaining
+          : RunEndState::kFinishedEventProcessing;
   for (std::unique_ptr<NodeEventLoopFactory> &node : node_factories_) {
     if (node) {
       for (SimulatedEventLoop *loop : node->event_loops_) {
@@ -1584,9 +1598,11 @@
       }
     }
   }
-  return ran_until_time;
+  return GetAndClearExitStatus().transform(
+      [ran_until_time]() { return ran_until_time; });
 }
-void SimulatedEventLoopFactory::Run() {
+
+Result<void> SimulatedEventLoopFactory::Run() {
   // This sets running to true too.
   scheduler_scheduler_.Run();
   for (std::unique_ptr<NodeEventLoopFactory> &node : node_factories_) {
@@ -1596,9 +1612,17 @@
       }
     }
   }
+  return GetAndClearExitStatus();
 }
 
-void SimulatedEventLoopFactory::Exit() { scheduler_scheduler_.Exit(); }
+void SimulatedEventLoopFactory::Exit(Result<void> status) {
+  if (!exit_status_.has_value()) {
+    exit_status_ = std::move(status);
+  } else {
+    VLOG(1) << "Exit status is already set; not setting it again.";
+  }
+  scheduler_scheduler_.Exit();
+}
 
 std::unique_ptr<ExitHandle> SimulatedEventLoopFactory::MakeExitHandle() {
   return std::make_unique<SimulatedFactoryExitHandle>(this);
diff --git a/aos/events/simulated_event_loop.h b/aos/events/simulated_event_loop.h
index 5fad005..3bcd639 100644
--- a/aos/events/simulated_event_loop.h
+++ b/aos/events/simulated_event_loop.h
@@ -88,17 +88,25 @@
 
   // Starts executing the event loops unconditionally until Exit is called or
   // all the nodes have shut down.
-  void Run();
+  // All Run*() methods return an unexpected value if there is either an
+  // internal fault that can still be recovered from gracefully or if a user
+  // application called Exit() with a Status.
+  Result<void> Run();
   // Executes the event loops for a duration.
-  void RunFor(distributed_clock::duration duration);
+  Result<void> RunFor(distributed_clock::duration duration);
   // Executes the event loops until a time.
-  // Returns true if there are still events remaining.
-  bool RunUntil(aos::realtime_clock::time_point time,
-                const aos::Node *node = nullptr);
+  // Returns kEventsRemaining if there are still events remaining.
+  // Returns an unexpected value if there was an error.
+  enum class RunEndState {
+    kEventsRemaining,
+    kFinishedEventProcessing,
+  };
+  Result<RunEndState> RunUntil(aos::realtime_clock::time_point time,
+                               const aos::Node *node = nullptr);
 
   // Stops executing all event loops.  Meant to be called from within an event
   // loop handler.
-  void Exit();
+  void Exit(Result<void> status = {});
 
   std::unique_ptr<ExitHandle> MakeExitHandle();
 
@@ -157,6 +165,10 @@
   friend class NodeEventLoopFactory;
   friend class SimulatedFactoryExitHandle;
 
+  // Returns the contents of exit_status_ (or a successful Result<> if
+  // exit_status_ is nullopt), and clears the exit status.
+  Result<void> GetAndClearExitStatus();
+
   const Configuration *const configuration_;
   EventSchedulerScheduler scheduler_scheduler_;
 
@@ -170,6 +182,12 @@
   std::vector<const Node *> nodes_;
 
   int exit_handle_count_ = 0;
+
+  // Once exit_status_ is set once, we will not set it again until we have
+  // actually exited. This is to try to provide consistent behavior in cases
+  // where Exit() is called multiple times before Run() is aactually terminates
+  // execution.
+  std::optional<Result<void>> exit_status_{};
 };
 
 // This class holds all the state required to be a single node.
diff --git a/aos/events/simulated_event_loop_test.cc b/aos/events/simulated_event_loop_test.cc
index a317db8..1588ede 100644
--- a/aos/events/simulated_event_loop_test.cc
+++ b/aos/events/simulated_event_loop_test.cc
@@ -42,7 +42,13 @@
     return event_loop_factory_->MakeEventLoop(name, my_node());
   }
 
-  void Run() override { event_loop_factory_->Run(); }
+  Result<void> Run() override { return event_loop_factory_->Run(); }
+
+  std::unique_ptr<ExitHandle> MakeExitHandle() override {
+    MaybeMake();
+    return event_loop_factory_->MakeExitHandle();
+  }
+
   void Exit() override { event_loop_factory_->Exit(); }
 
   // TODO(austin): Implement this.  It's used currently for a phased loop test.