Merge "Hint on a couple of NPE crashes during log reading."
diff --git a/aos/events/event_loop.cc b/aos/events/event_loop.cc
index c29d820..2d932cc 100644
--- a/aos/events/event_loop.cc
+++ b/aos/events/event_loop.cc
@@ -34,6 +34,25 @@
 }
 }  // namespace
 
+std::pair<SharedSpan, absl::Span<uint8_t>> MakeSharedSpan(size_t size) {
+  AlignedOwningSpan *const span = reinterpret_cast<AlignedOwningSpan *>(
+      malloc(sizeof(AlignedOwningSpan) + size + kChannelDataAlignment - 1));
+
+  absl::Span<uint8_t> mutable_span(
+      reinterpret_cast<uint8_t *>(RoundChannelData(span->data(), size)), size);
+  // Use the placement new operator to construct an actual absl::Span in place.
+  new (span) AlignedOwningSpan(mutable_span);
+
+  return std::make_pair(
+      SharedSpan(std::shared_ptr<AlignedOwningSpan>(span,
+                                                    [](AlignedOwningSpan *s) {
+                                                      s->~AlignedOwningSpan();
+                                                      free(s);
+                                                    }),
+                 &span->span),
+      mutable_span);
+}
+
 std::ostream &operator<<(std::ostream &os, const RawSender::Error err) {
   os << ErrorToString(err);
   return os;
diff --git a/aos/events/event_loop.h b/aos/events/event_loop.h
index 23250e1..8825464 100644
--- a/aos/events/event_loop.h
+++ b/aos/events/event_loop.h
@@ -133,6 +133,25 @@
   Ftrace ftrace_;
 };
 
+using SharedSpan = std::shared_ptr<const absl::Span<const uint8_t>>;
+
+// Holds storage for a span object and the data referenced by that span for
+// compatibility with SharedSpan users. If constructed with MakeSharedSpan, span
+// points to only the aligned segment of the entire data.
+struct AlignedOwningSpan {
+  AlignedOwningSpan(absl::Span<const uint8_t> new_span) : span(new_span) {}
+
+  AlignedOwningSpan(const AlignedOwningSpan &) = delete;
+  AlignedOwningSpan &operator=(const AlignedOwningSpan &) = delete;
+  absl::Span<const uint8_t> span;
+  char *data() { return reinterpret_cast<char *>(this + 1); }
+};
+
+// Constructs a span which owns its data through a shared_ptr. The owning span
+// points to a const view of the data; also returns a temporary mutable span
+// which is only valid while the const shared span is kept alive.
+std::pair<SharedSpan, absl::Span<uint8_t>> MakeSharedSpan(size_t size);
+
 // Raw version of sender.  Sends a block of data.  This is used for reflection
 // and as a building block to implement typed senders.
 class RawSender {
diff --git a/aos/events/event_loop_param_test.cc b/aos/events/event_loop_param_test.cc
index 07bd6d4..d3d21fd 100644
--- a/aos/events/event_loop_param_test.cc
+++ b/aos/events/event_loop_param_test.cc
@@ -2389,6 +2389,68 @@
   }
 }
 
+// Tests that the RawSender::Send(SharedSpan) overload works.
+TEST_P(AbstractEventLoopTest, SharedSenderTimingReport) {
+  gflags::FlagSaver flag_saver;
+  FLAGS_timing_report_ms = 1000;
+  auto loop1 = Make();
+  auto loop2 = MakePrimary();
+
+  const FlatbufferDetachedBuffer<TestMessage> kMessage =
+      JsonToFlatbuffer<TestMessage>("{}");
+
+  std::unique_ptr<aos::RawSender> sender =
+      loop2->MakeRawSender(configuration::GetChannel(
+          loop2->configuration(), "/test", "aos.TestMessage", "", nullptr));
+
+  Fetcher<timing::Report> report_fetcher =
+      loop1->MakeFetcher<timing::Report>("/aos");
+  EXPECT_FALSE(report_fetcher.Fetch());
+
+  loop2->OnRun([&]() {
+    for (int ii = 0; ii < TestChannelQueueSize(loop2.get()); ++ii) {
+      auto shared_span = MakeSharedSpan(kMessage.span().size());
+      memcpy(shared_span.second.data(), kMessage.span().data(),
+             kMessage.span().size());
+      EXPECT_EQ(sender->Send(std::move(shared_span.first)),
+                RawSender::Error::kOk);
+    }
+    auto shared_span = MakeSharedSpan(kMessage.span().size());
+    memcpy(shared_span.second.data(), kMessage.span().data(),
+           kMessage.span().size());
+    EXPECT_EQ(sender->Send(std::move(shared_span.first)),
+              RawSender::Error::kMessagesSentTooFast);
+  });
+  // Quit after 1 timing report, mid way through the next cycle.
+  EndEventLoop(loop2.get(), chrono::milliseconds(1500));
+
+  Run();
+
+  if (do_timing_reports() == DoTimingReports::kYes) {
+    // Check that the sent too fast actually got recorded by the timing report.
+    FlatbufferDetachedBuffer<timing::Report> primary_report =
+        FlatbufferDetachedBuffer<timing::Report>::Empty();
+    while (report_fetcher.FetchNext()) {
+      if (report_fetcher->name()->string_view() == "primary") {
+        primary_report = CopyFlatBuffer(report_fetcher.get());
+      }
+    }
+
+    EXPECT_EQ(primary_report.message().name()->string_view(), "primary");
+
+    ASSERT_NE(primary_report.message().senders(), nullptr);
+    EXPECT_EQ(primary_report.message().senders()->size(), 3);
+    EXPECT_EQ(
+        primary_report.message()
+            .senders()
+            ->Get(0)
+            ->error_counts()
+            ->Get(static_cast<size_t>(timing::SendError::MESSAGE_SENT_TOO_FAST))
+            ->count(),
+        1);
+  }
+}
+
 // Tests that senders count correctly in the timing report.
 TEST_P(AbstractEventLoopTest, WatcherTimingReport) {
   FLAGS_timing_report_ms = 1000;
@@ -2619,9 +2681,10 @@
           loop3->configuration(), "/test", "aos.TestMessage", "", nullptr));
 
   loop2->OnRun([&]() {
-    EXPECT_EQ(sender->Send(std::make_shared<absl::Span<const uint8_t>>(
-                  kMessage.span().data(), kMessage.span().size())),
-              RawSender::Error::kOk);
+    auto shared_span = MakeSharedSpan(kMessage.span().size());
+    memcpy(shared_span.second.data(), kMessage.span().data(),
+           kMessage.span().size());
+    sender->CheckOk(sender->Send(std::move(shared_span.first)));
   });
 
   bool happened = false;
diff --git a/aos/events/logging/log_reader.cc b/aos/events/logging/log_reader.cc
index 1d8cf18..a299c61 100644
--- a/aos/events/logging/log_reader.cc
+++ b/aos/events/logging/log_reader.cc
@@ -1,5 +1,6 @@
 #include "aos/events/logging/log_reader.h"
 
+#include <dirent.h>
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -1978,9 +1979,8 @@
 
   // Send!  Use the replayed queue index here instead of the logged queue index
   // for the remote queue index.  This makes re-logging work.
-  const auto err = sender->Send(
-      RawSender::SharedSpan(timestamped_message.data,
-                            &timestamped_message.data->span),
+  const RawSender::Error err = sender->Send(
+      SharedSpan(timestamped_message.data, &timestamped_message.data->span),
       timestamped_message.monotonic_remote_time.time,
       timestamped_message.realtime_remote_time, remote_queue_index,
       (channel_source_state_[timestamped_message.channel_index] != nullptr
diff --git a/aos/events/simulated_event_loop.cc b/aos/events/simulated_event_loop.cc
index c679b21..c021a84 100644
--- a/aos/events/simulated_event_loop.cc
+++ b/aos/events/simulated_event_loop.cc
@@ -49,41 +49,6 @@
   const bool prior_;
 };
 
-// Holds storage for a span object and the data referenced by that span for
-// compatibility with RawSender::SharedSpan users. If constructed with
-// MakeSharedSpan, span points to only the aligned segment of the entire data.
-struct AlignedOwningSpan {
-  AlignedOwningSpan(const AlignedOwningSpan &) = delete;
-  AlignedOwningSpan &operator=(const AlignedOwningSpan &) = delete;
-  absl::Span<const uint8_t> span;
-  char data[];
-};
-
-// Constructs a span which owns its data through a shared_ptr. The owning span
-// points to a const view of the data; also returns a temporary mutable span
-// which is only valid while the const shared span is kept alive.
-std::pair<RawSender::SharedSpan, absl::Span<uint8_t>> MakeSharedSpan(
-    size_t size) {
-  AlignedOwningSpan *const span = reinterpret_cast<AlignedOwningSpan *>(
-      malloc(sizeof(AlignedOwningSpan) + size + kChannelDataAlignment - 1));
-
-  absl::Span<uint8_t> mutable_span(
-      reinterpret_cast<uint8_t *>(RoundChannelData(&span->data[0], size)),
-      size);
-  // Use the placement new operator to construct an actual absl::Span in place.
-  new (&span->span) absl::Span(mutable_span);
-
-  return std::make_pair(
-      RawSender::SharedSpan(
-          std::shared_ptr<AlignedOwningSpan>(span,
-                                             [](AlignedOwningSpan *s) {
-                                               s->~AlignedOwningSpan();
-                                               free(s);
-                                             }),
-          &span->span),
-      mutable_span);
-}
-
 // Container for both a message, and the context for it for simulation.  This
 // makes tracking the timestamps associated with the data easy.
 struct SimulatedMessage final {
@@ -93,8 +58,8 @@
 
   // Creates a SimulatedMessage with size bytes of storage.
   // This is a shared_ptr so we don't have to implement refcounting or copying.
-  static std::shared_ptr<SimulatedMessage> Make(
-      SimulatedChannel *channel, const RawSender::SharedSpan data);
+  static std::shared_ptr<SimulatedMessage> Make(SimulatedChannel *channel,
+                                                const SharedSpan data);
 
   // Context for the data.
   Context context;
@@ -103,7 +68,7 @@
 
   // Owning span to this message's data. Depending on the sender may either
   // represent the data of just the flatbuffer, or max channel size.
-  RawSender::SharedSpan data;
+  SharedSpan data;
 
   // Mutable view of above data. If empty, this message is not mutable.
   absl::Span<uint8_t> mutable_data;
@@ -336,7 +301,7 @@
 namespace {
 
 std::shared_ptr<SimulatedMessage> SimulatedMessage::Make(
-    SimulatedChannel *channel, RawSender::SharedSpan data) {
+    SimulatedChannel *channel, SharedSpan data) {
   // The allocations in here are due to infrastructure and don't count in the no
   // mallocs in RT code.
   ScopedNotRealtime nrt;
@@ -1165,8 +1130,7 @@
 }
 
 RawSender::Error SimulatedSender::DoSend(
-    const RawSender::SharedSpan data,
-    monotonic_clock::time_point monotonic_remote_time,
+    const SharedSpan data, monotonic_clock::time_point monotonic_remote_time,
     realtime_clock::time_point realtime_remote_time,
     uint32_t remote_queue_index, const UUID &source_boot_uuid) {
   CHECK_LE(data->size(), this->size())
diff --git a/aos/libc/aos_strsignal.cc b/aos/libc/aos_strsignal.cc
index 0f9b065..cf1aad1 100644
--- a/aos/libc/aos_strsignal.cc
+++ b/aos/libc/aos_strsignal.cc
@@ -15,9 +15,23 @@
     return buffer;
   }
 
+// sys_strsignal depricated in glibc2.32
+#ifdef __GLIBC__
+  #if __GLIBC_PREREQ(2, 32)
+  if (signal > 0 && signal < NSIG && sigdescr_np(signal) != nullptr) {
+    return sigdescr_np(signal);
+  }
+  #else
   if (signal > 0 && signal < NSIG && sys_siglist[signal] != nullptr) {
     return sys_siglist[signal];
   }
+  #endif
+// If not using GLIBC assume we can use sys_siglist
+#else
+  if (signal > 0 && signal < NSIG && sys_siglist[signal] != nullptr) {
+    return sys_siglist[signal];
+  }
+#endif
 
   CHECK_GT(snprintf(buffer, sizeof(buffer), "Unknown signal %d", signal), 0);
   return buffer;
diff --git a/aos/network/sctp_client.cc b/aos/network/sctp_client.cc
index e3da03a..ab70c20 100644
--- a/aos/network/sctp_client.cc
+++ b/aos/network/sctp_client.cc
@@ -51,7 +51,9 @@
   message_bridge::LogSctpStatus(fd(), assoc_id);
 }
 
-void SctpClient::SetPriorityScheduler(sctp_assoc_t assoc_id) {
+void SctpClient::SetPriorityScheduler([[maybe_unused]] sctp_assoc_t assoc_id) {
+// Kernel 4.9 does not have SCTP_SS_PRIO
+#ifdef SCTP_SS_PRIO
   struct sctp_assoc_value scheduler;
   memset(&scheduler, 0, sizeof(scheduler));
   scheduler.assoc_id = assoc_id;
@@ -61,6 +63,7 @@
     LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
                             << " [" << errno << "]";
   }
+#endif
 }
 
 }  // namespace message_bridge
diff --git a/aos/network/sctp_server.cc b/aos/network/sctp_server.cc
index 2f6a041..0bac4c7 100644
--- a/aos/network/sctp_server.cc
+++ b/aos/network/sctp_server.cc
@@ -69,7 +69,9 @@
   }
 }
 
-void SctpServer::SetPriorityScheduler(sctp_assoc_t assoc_id) {
+void SctpServer::SetPriorityScheduler([[maybe_unused]] sctp_assoc_t assoc_id) {
+// Kernel 4.9 does not have SCTP_SS_PRIO
+#ifdef SCTP_SS_PRIO
   struct sctp_assoc_value scheduler;
   memset(&scheduler, 0, sizeof(scheduler));
   scheduler.assoc_id = assoc_id;
@@ -79,10 +81,14 @@
     LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
                             << " [" << errno << "]";
   }
+#endif
 }
 
-void SctpServer::SetStreamPriority(sctp_assoc_t assoc_id, int stream_id,
-                                   uint16_t priority) {
+void SctpServer::SetStreamPriority([[maybe_unused]] sctp_assoc_t assoc_id,
+                                   [[maybe_unused]] int stream_id,
+                                   [[maybe_unused]] uint16_t priority) {
+// Kernel 4.9 does not have SCTP_STREAM_SCHEDULER_VALUE
+#ifdef SCTP_STREAM_SCHEDULER_VALUE
   struct sctp_stream_value sctp_priority;
   memset(&sctp_priority, 0, sizeof(sctp_priority));
   sctp_priority.assoc_id = assoc_id;
@@ -93,6 +99,7 @@
     LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
                             << " [" << errno << "]";
   }
+#endif
 }
 
 }  // namespace message_bridge
diff --git a/aos/starter/BUILD b/aos/starter/BUILD
index 9068caa..7ef3777 100644
--- a/aos/starter/BUILD
+++ b/aos/starter/BUILD
@@ -118,6 +118,7 @@
         "//aos/events:ping_fbs",
         "//aos/events:pong_fbs",
         "//aos/events:simulated_event_loop",
+        "//aos/ipc_lib:event",
         "//aos/testing:googletest",
         "//aos/testing:path",
         "//aos/testing:tmpdir",
diff --git a/aos/starter/starter_rpc_lib.cc b/aos/starter/starter_rpc_lib.cc
index 15132ec..5f14e21 100644
--- a/aos/starter/starter_rpc_lib.cc
+++ b/aos/starter/starter_rpc_lib.cc
@@ -225,10 +225,12 @@
   // Clear commands prior to calling handlers to allow the handler to call
   // SendCommands() again if desired.
   current_commands_.clear();
+  // Clear the timer before calling success handler, in case the success
+  // handler needs to modify timeout handler.
+  timeout_timer_->Disable();
   if (success_handler_) {
     success_handler_();
   }
-  timeout_timer_->Disable();
 }
 
 bool SendCommandBlocking(aos::starter::Command command, std::string_view name,
diff --git a/aos/starter/starter_test.cc b/aos/starter/starter_test.cc
index 87cb544..79880f7 100644
--- a/aos/starter/starter_test.cc
+++ b/aos/starter/starter_test.cc
@@ -1,9 +1,11 @@
+#include <chrono>
 #include <csignal>
 #include <future>
 #include <thread>
 
 #include "aos/events/ping_generated.h"
 #include "aos/events/pong_generated.h"
+#include "aos/ipc_lib/event.h"
 #include "aos/network/team_number.h"
 #include "aos/testing/path.h"
 #include "aos/testing/tmpdir.h"
@@ -19,11 +21,9 @@
 
 class StarterdTest : public ::testing::Test {
  public:
-  StarterdTest() : shm_dir_(aos::testing::TestTmpDir() + "/aos") {
-    FLAGS_shm_base = shm_dir_;
-
+  StarterdTest() {
     // Nuke the shm dir:
-    aos::util::UnlinkRecursive(shm_dir_);
+    aos::util::UnlinkRecursive(FLAGS_shm_base);
   }
 
  protected:
@@ -35,11 +35,10 @@
           }
         })
         ->Setup(starter->event_loop()->monotonic_now(),
-                std::chrono::seconds(1));
+                std::chrono::milliseconds(100));
   }
 
   gflags::FlagSaver flag_saver_;
-  std::string shm_dir_;
   // Used to track when the test completes so that we can clean up the starter
   // in its thread.
   std::atomic<bool> test_done_{false};
@@ -79,8 +78,8 @@
                                     "args": ["--shm_base", "%s", "--config", "%s", "--override_hostname", "%s"]
                                   }
                                 ]})",
-          ArtifactPath("aos/events/ping"), shm_dir_, config_file,
-          GetParam().hostname, ArtifactPath("aos/events/pong"), shm_dir_,
+          ArtifactPath("aos/events/ping"), FLAGS_shm_base, config_file,
+          GetParam().hostname, ArtifactPath("aos/events/pong"), FLAGS_shm_base,
           config_file, GetParam().hostname));
 
   const aos::Configuration *config_msg = &new_config.message();
@@ -161,10 +160,23 @@
 
   SetupStarterCleanup(&starter);
 
-  std::thread starterd_thread([&starter] { starter.Run(); });
-  std::thread client_thread([&client_loop] { client_loop.Run(); });
-  watcher_loop.Run();
+  Event starter_started;
+  std::thread starterd_thread([&starter, &starter_started] {
+    starter.event_loop()->OnRun(
+        [&starter_started]() { starter_started.Set(); });
+    starter.Run();
+  });
+  starter_started.Wait();
 
+  Event client_started;
+  std::thread client_thread([&client_loop, &client_started] {
+    client_loop.OnRun([&client_started]() { client_started.Set(); });
+    client_loop.Run();
+  });
+  client_started.Wait();
+
+  watcher_loop.Run();
+  ASSERT_TRUE(success);
   test_done_ = true;
   client_thread.join();
   starterd_thread.join();
@@ -197,8 +209,8 @@
                                     "args": ["--shm_base", "%s"]
                                   }
                                 ]})",
-                             ArtifactPath("aos/events/ping"), shm_dir_,
-                             ArtifactPath("aos/events/pong"), shm_dir_));
+                             ArtifactPath("aos/events/ping"), FLAGS_shm_base,
+                             ArtifactPath("aos/events/pong"), FLAGS_shm_base));
 
   const aos::Configuration *config_msg = &new_config.message();
 
@@ -257,7 +269,13 @@
 
   SetupStarterCleanup(&starter);
 
-  std::thread starterd_thread([&starter] { starter.Run(); });
+  Event starter_started;
+  std::thread starterd_thread([&starter, &starter_started] {
+    starter.event_loop()->OnRun(
+        [&starter_started]() { starter_started.Set(); });
+    starter.Run();
+  });
+  starter_started.Wait();
   watcher_loop.Run();
 
   test_done_ = true;
@@ -287,8 +305,8 @@
                                     "args": ["--shm_base", "%s"]
                                   }
                                 ]})",
-                             ArtifactPath("aos/events/ping"), shm_dir_,
-                             ArtifactPath("aos/events/pong"), shm_dir_));
+                             ArtifactPath("aos/events/ping"), FLAGS_shm_base,
+                             ArtifactPath("aos/events/pong"), FLAGS_shm_base));
 
   const aos::Configuration *config_msg = &new_config.message();
 
@@ -346,7 +364,13 @@
 
   SetupStarterCleanup(&starter);
 
-  std::thread starterd_thread([&starter] { starter.Run(); });
+  Event starter_started;
+  std::thread starterd_thread([&starter, &starter_started] {
+    starter.event_loop()->OnRun(
+        [&starter_started]() { starter_started.Set(); });
+    starter.Run();
+  });
+  starter_started.Wait();
   watcher_loop.Run();
 
   test_done_ = true;
@@ -362,25 +386,23 @@
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(config_file);
 
-  const std::string test_dir = aos::testing::TestTmpDir();
-
   auto new_config = aos::configuration::MergeWithConfig(
       &config.message(), absl::StrFormat(
                              R"({"applications": [
                                   {
                                     "name": "ping",
                                     "executable_name": "%s",
-                                    "args": ["--shm_base", "%s/aos"],
+                                    "args": ["--shm_base", "%s"],
                                     "autorestart": false
                                   },
                                   {
                                     "name": "pong",
                                     "executable_name": "%s",
-                                    "args": ["--shm_base", "%s/aos"]
+                                    "args": ["--shm_base", "%s"]
                                   }
                                 ]})",
-                             ArtifactPath("aos/events/ping"), test_dir,
-                             ArtifactPath("aos/events/pong"), test_dir));
+                             ArtifactPath("aos/events/ping"), FLAGS_shm_base,
+                             ArtifactPath("aos/events/pong"), FLAGS_shm_base));
 
   const aos::Configuration *config_msg = &new_config.message();
 
@@ -439,7 +461,13 @@
 
   SetupStarterCleanup(&starter);
 
-  std::thread starterd_thread([&starter] { starter.Run(); });
+  Event starter_started;
+  std::thread starterd_thread([&starter, &starter_started] {
+    starter.event_loop()->OnRun(
+        [&starter_started]() { starter_started.Set(); });
+    starter.Run();
+  });
+  starter_started.Wait();
   watcher_loop.Run();
 
   test_done_ = true;
@@ -447,5 +475,121 @@
   starterd_thread.join();
 }
 
+TEST_F(StarterdTest, StarterChainTest) {
+  // This test was written in response to a bug that was found
+  // in StarterClient::Succeed. The bug caused the timeout handler
+  // to be reset after the success handler was called.
+  // the bug has been fixed, and this test will ensure it does
+  // not regress.
+  const std::string config_file =
+      ArtifactPath("aos/events/pingpong_config.json");
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(config_file);
+  auto new_config = aos::configuration::MergeWithConfig(
+      &config.message(), absl::StrFormat(
+                             R"({"applications": [
+                                {
+                                  "name": "ping",
+                                  "executable_name": "%s",
+                                  "args": ["--shm_base", "%s"],
+                                  "autorestart": false
+                                },
+                                {
+                                  "name": "pong",
+                                  "executable_name": "%s",
+                                  "args": ["--shm_base", "%s"]
+                                }
+                              ]})",
+                             ArtifactPath("aos/events/ping"), FLAGS_shm_base,
+                             ArtifactPath("aos/events/pong"), FLAGS_shm_base));
+
+  const aos::Configuration *config_msg = &new_config.message();
+  // Set up starter with config file
+  aos::starter::Starter starter(config_msg);
+  aos::ShmEventLoop client_loop(config_msg);
+  client_loop.SkipAosLog();
+  StarterClient client(&client_loop);
+  bool success = false;
+  auto client_node = client_loop.node();
+
+  // limit the amount of time we will wait for the test to finish.
+  client_loop
+      .AddTimer([&client_loop] {
+        client_loop.Exit();
+        FAIL() << "ERROR: The test has failed, the watcher has timed out. "
+                  "The chain of stages defined below did not complete "
+                  "within the time limit.";
+      })
+      ->Setup(client_loop.monotonic_now() + std::chrono::seconds(20));
+
+  // variables have been defined, here we define the body of the test.
+  // We want stage1 to succeed, triggering stage2.
+  // We want stage2 to timeout, triggering stage3.
+
+  auto stage3 = [&client_loop, &success]() {
+    LOG(INFO) << "Begin stage3.";
+    SUCCEED();
+    success = true;
+    client_loop.Exit();
+    LOG(INFO) << "End stage3.";
+  };
+  auto stage2 = [this, &starter, &client, &client_node, &stage3] {
+    LOG(INFO) << "Begin stage2";
+    test_done_ = true;  // trigger `starter` to exit.
+
+    // wait for the starter event loop to close, so we can
+    // intentionally trigger a timeout.
+    int attempts = 0;
+    while (starter.event_loop()->is_running()) {
+      ++attempts;
+      if (attempts > 5) {
+        LOG(INFO) << "Timeout while waiting for starter to exit";
+        return;
+      }
+      LOG(INFO) << "Waiting for starter to close.";
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+    }
+    client.SetTimeoutHandler(stage3);
+    client.SetSuccessHandler([]() {
+      LOG(INFO) << "stage3 success handler called.";
+      FAIL() << ": Command should not have succeeded here.";
+    });
+    // we want this command to timeout
+    client.SendCommands({{Command::START, "ping", {client_node}}},
+                        std::chrono::seconds(5));
+    LOG(INFO) << "End stage2";
+  };
+  auto stage1 = [&client, &client_node, &stage2] {
+    LOG(INFO) << "Begin stage1";
+    client.SetTimeoutHandler(
+        []() { FAIL() << ": Command should not have timed out."; });
+    client.SetSuccessHandler(stage2);
+    client.SendCommands({{Command::STOP, "ping", {client_node}}},
+                        std::chrono::seconds(5));
+    LOG(INFO) << "End stage1";
+  };
+  // start the test body
+  client_loop.AddTimer(stage1)->Setup(client_loop.monotonic_now() +
+                                      std::chrono::milliseconds(1));
+
+  // prepare the cleanup for starter. This will finish when we call
+  // `test_done_ = true;`.
+  SetupStarterCleanup(&starter);
+
+  // run `starter.Run()` in a thread to simulate it running on
+  // another process.
+  Event started;
+  std::thread starterd_thread([&starter, &started] {
+    starter.event_loop()->OnRun([&started]() { started.Set(); });
+    starter.Run();
+  });
+
+  started.Wait();
+  client_loop.Run();
+  EXPECT_TRUE(success);
+  ASSERT_FALSE(starter.event_loop()->is_running());
+  starterd_thread.join();
+}
+
 }  // namespace starter
 }  // namespace aos
diff --git a/aos/starter/starterd_lib.cc b/aos/starter/starterd_lib.cc
index b8b7343..30e0887 100644
--- a/aos/starter/starterd_lib.cc
+++ b/aos/starter/starterd_lib.cc
@@ -35,7 +35,10 @@
         SendStatus();
         status_count_ = 0;
       })),
-      cleanup_timer_(event_loop_.AddTimer([this] { event_loop_.Exit(); })),
+      cleanup_timer_(event_loop_.AddTimer([this] {
+        event_loop_.Exit();
+        LOG(INFO) << "Starter event loop exit finished.";
+      })),
       max_status_count_(
           event_loop_.GetChannel<aos::starter::Status>("/aos")->frequency() -
           1),