Merge "Fault in all the SHM pages during startup"
diff --git a/README.md b/README.md
index d54d963..f81d423 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,8 @@
   1. Install the required packages:
 ```console
 apt-get update
-apt-get install bazel python
+# TODO(james): Ideally, we shouldn't need to be installing libtinfo5...
+apt-get install bazel python libtinfo5
 ```
   2. Allow Bazel's sandboxing to work:
      Follow the direction in `doc/frc971.conf`.
diff --git a/WORKSPACE b/WORKSPACE
index 9f087bc..dced1f0 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -60,11 +60,11 @@
 )
 load(
     "//debian:gstreamer_amd64.bzl",
-    gstreamer_amd64_debs = "files"
+    gstreamer_amd64_debs = "files",
 )
 load(
     "//debian:gstreamer_armhf.bzl",
-    gstreamer_armhf_debs = "files"
+    gstreamer_armhf_debs = "files",
 )
 load("//debian:packages.bzl", "generate_repositories_for_debs")
 
@@ -681,6 +681,7 @@
 http_archive(
     name = "halide_k8",
     build_file = "@//debian:halide.BUILD",
+    sha256 = "c67185d50a99adba86f6b2cc43c7e2cf11bcdfba9052d05e764a89b456a50446",
     strip_prefix = "halide/",
     url = "http://www.frc971.org/Build-Dependencies/halide-linux-64-gcc53-800-65c26cba6a3eca2d08a0bccf113ca28746012cc3.tgz",
 )
@@ -691,6 +692,7 @@
 http_archive(
     name = "halide_armhf",
     build_file = "@//debian:halide.BUILD",
+    sha256 = "10564c559c9e04a173823413916d05fadd6e697d91bab21ddc5041190fa8f0f0",
     strip_prefix = "halide/",
     url = "http://www.frc971.org/Build-Dependencies/halide-arm32-linux-32-trunk-65c26cba6a3eca2d08a0bccf113ca28746012cc3.tgz",
 )
@@ -718,3 +720,23 @@
     sha256 = "c5ac4c604952c274a50636e244f0d091bd1de302032446f24f0e9e03ae9c76f7",
     url = "http://www.frc971.org/Build-Dependencies/gstreamer_armhf.tar.gz",
 )
+
+# Downloaded from:
+# https://files.pythonhosted.org/packages/64/a7/45e11eebf2f15bf987c3bc11d37dcc838d9dc81250e67e4c5968f6008b6c/Jinja2-2.11.2.tar.gz
+http_archive(
+    name = "python_jinja2",
+    build_file = "@//debian:python_jinja2.BUILD",
+    sha256 = "89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0",
+    strip_prefix = "Jinja2-2.11.2",
+    url = "http://www.frc971.org/Build-Dependencies/Jinja2-2.11.2.tar.gz",
+)
+
+# Downloaded from:
+# https://files.pythonhosted.org/packages/b9/2e/64db92e53b86efccfaea71321f597fa2e1b2bd3853d8ce658568f7a13094/MarkupSafe-1.1.1.tar.gz
+http_archive(
+    name = "python_markupsafe",
+    build_file = "@//debian:python_markupsafe.BUILD",
+    sha256 = "29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
+    strip_prefix = "MarkupSafe-1.1.1",
+    url = "http://www.frc971.org/Build-Dependencies/MarkupSafe-1.1.1.tar.gz",
+)
diff --git a/aos/BUILD b/aos/BUILD
index da558a4..d35e74c 100644
--- a/aos/BUILD
+++ b/aos/BUILD
@@ -526,3 +526,17 @@
         "@com_github_google_glog//:glog",
     ],
 )
+
+cc_library(
+    name = "ftrace",
+    srcs = [
+        "ftrace.cc",
+    ],
+    hdrs = [
+        "ftrace.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@com_github_google_glog//:glog",
+    ],
+)
diff --git a/aos/aos_dump.cc b/aos/aos_dump.cc
index fd33eca..1aee018 100644
--- a/aos/aos_dump.cc
+++ b/aos/aos_dump.cc
@@ -10,6 +10,38 @@
 DEFINE_string(config, "./config.json", "File path of aos configuration");
 DEFINE_int32(max_vector_size, 100,
              "If positive, vectors longer than this will not be printed");
+DEFINE_bool(fetch, false,
+            "If true, fetch the current message on the channel first");
+
+namespace {
+
+void PrintMessage(const aos::Channel *channel, const aos::Context &context) {
+  // Print the flatbuffer out to stdout, both to remove the
+  // unnecessary cruft from glog and to allow the user to readily
+  // redirect just the logged output independent of any debugging
+  // information on stderr.
+  if (context.monotonic_remote_time != context.monotonic_event_time) {
+    std::cout << context.realtime_remote_time << " ("
+              << context.monotonic_remote_time << ") delivered "
+              << context.realtime_event_time << " ("
+              << context.monotonic_event_time << "): "
+              << aos::FlatbufferToJson(
+                     channel->schema(),
+                     static_cast<const uint8_t *>(context.data),
+                     FLAGS_max_vector_size)
+              << '\n';
+  } else {
+    std::cout << context.realtime_event_time << " ("
+              << context.monotonic_event_time << "): "
+              << aos::FlatbufferToJson(
+                     channel->schema(),
+                     static_cast<const uint8_t *>(context.data),
+                     FLAGS_max_vector_size)
+              << '\n';
+  }
+}
+
+}  // namespace
 
 int main(int argc, char **argv) {
   aos::InitGoogle(&argc, &argv);
@@ -46,32 +78,19 @@
   for (const aos::Channel *channel : *channels) {
     if (channel->name()->c_str() == channel_name &&
         channel->type()->str().find(message_type) != std::string::npos) {
-      event_loop.MakeRawWatcher(
-          channel, [channel](const aos::Context &context, const void *message) {
-            // Print the flatbuffer out to stdout, both to remove the
-            // unnecessary cruft from glog and to allow the user to readily
-            // redirect just the logged output independent of any debugging
-            // information on stderr.
-            if (context.monotonic_remote_time != context.monotonic_event_time) {
-              std::cout << context.realtime_remote_time << " ("
-                        << context.monotonic_remote_time << ") delivered "
-                        << context.realtime_event_time << " ("
-                        << context.monotonic_event_time << "): "
-                        << aos::FlatbufferToJson(
-                               channel->schema(),
-                               static_cast<const uint8_t *>(message),
-                               FLAGS_max_vector_size)
-                        << '\n';
-            } else {
-              std::cout << context.realtime_event_time << " ("
-                        << context.monotonic_event_time << "): "
-                        << aos::FlatbufferToJson(
-                               channel->schema(),
-                               static_cast<const uint8_t *>(message),
-                               FLAGS_max_vector_size)
-                        << '\n';
-            }
-          });
+      if (FLAGS_fetch) {
+        const std::unique_ptr<aos::RawFetcher> fetcher =
+            event_loop.MakeRawFetcher(channel);
+        if (fetcher->Fetch()) {
+          PrintMessage(channel, fetcher->context());
+        }
+      }
+
+      event_loop.MakeRawWatcher(channel, [channel](const aos::Context &context,
+                                                   const void * /*message*/) {
+        PrintMessage(channel, context);
+      });
+
       found_channels++;
     }
   }
diff --git a/aos/configuration.cc b/aos/configuration.cc
index cef3876..43e44f2 100644
--- a/aos/configuration.cc
+++ b/aos/configuration.cc
@@ -12,6 +12,7 @@
 #include <string_view>
 
 #include "absl/container/btree_set.h"
+#include "absl/strings/str_cat.h"
 #include "aos/configuration_generated.h"
 #include "aos/flatbuffer_merge.h"
 #include "aos/json_to_flatbuffer.h"
@@ -539,6 +540,12 @@
   return FlatbufferToJson(cleaned_channel);
 }
 
+std::string StrippedChannelToString(const Channel *channel) {
+  return absl::StrCat("{ \"name\": \"", channel->name()->string_view(),
+                      "\", \"type\": \"", channel->type()->string_view(),
+                      "\" }");
+}
+
 FlatbufferDetachedBuffer<Configuration> MergeConfiguration(
     const Flatbuffer<Configuration> &config,
     const std::vector<aos::FlatbufferString<reflection::Schema>> &schemas) {
diff --git a/aos/configuration.h b/aos/configuration.h
index 9193fd5..ccc6246 100644
--- a/aos/configuration.h
+++ b/aos/configuration.h
@@ -113,6 +113,8 @@
 
 // Prints a channel to json, but without the schema.
 std::string CleanedChannelToString(const Channel *channel);
+// Prints out a channel to json, but only with the name and type.
+std::string StrippedChannelToString(const Channel *channel);
 
 // Returns the node names that this node should be forwarding to.
 std::vector<std::string_view> DestinationNodeNames(const Configuration *config,
diff --git a/aos/events/BUILD b/aos/events/BUILD
index d35f206..39a6a54 100644
--- a/aos/events/BUILD
+++ b/aos/events/BUILD
@@ -74,6 +74,7 @@
         "//aos:configuration",
         "//aos:configuration_fbs",
         "//aos:flatbuffers",
+        "//aos:ftrace",
         "//aos/ipc_lib:data_alignment",
         "//aos/logging:implementations",
         "//aos/time",
diff --git a/aos/events/event_loop.cc b/aos/events/event_loop.cc
index c03425e..da89d9b 100644
--- a/aos/events/event_loop.cc
+++ b/aos/events/event_loop.cc
@@ -14,6 +14,7 @@
 RawSender::RawSender(EventLoop *event_loop, const Channel *channel)
     : event_loop_(event_loop),
       channel_(channel),
+      ftrace_prefix_(configuration::StrippedChannelToString(channel)),
       timing_(event_loop_->ChannelIndex(channel)) {
   event_loop_->NewSender(this);
 }
@@ -23,6 +24,7 @@
 RawFetcher::RawFetcher(EventLoop *event_loop, const Channel *channel)
     : event_loop_(event_loop),
       channel_(channel),
+      ftrace_prefix_(configuration::StrippedChannelToString(channel)),
       timing_(event_loop_->ChannelIndex(channel)) {
   context_.monotonic_event_time = monotonic_clock::min_time;
   context_.monotonic_remote_time = monotonic_clock::min_time;
diff --git a/aos/events/event_loop.h b/aos/events/event_loop.h
index bc5a5ae..c36b195 100644
--- a/aos/events/event_loop.h
+++ b/aos/events/event_loop.h
@@ -1,6 +1,8 @@
 #ifndef AOS_EVENTS_EVENT_LOOP_H_
 #define AOS_EVENTS_EVENT_LOOP_H_
 
+#include <sched.h>
+
 #include <atomic>
 #include <string>
 #include <string_view>
@@ -12,6 +14,7 @@
 #include "aos/events/event_loop_generated.h"
 #include "aos/events/timing_statistics.h"
 #include "aos/flatbuffers.h"
+#include "aos/ftrace.h"
 #include "aos/ipc_lib/data_alignment.h"
 #include "aos/json_to_flatbuffer.h"
 #include "aos/time/time.h"
@@ -90,10 +93,12 @@
   virtual std::pair<bool, monotonic_clock::time_point> DoFetchNext() = 0;
   virtual std::pair<bool, monotonic_clock::time_point> DoFetch() = 0;
 
-  EventLoop *event_loop_;
-  const Channel *channel_;
+  EventLoop *const event_loop_;
+  const Channel *const channel_;
+  const std::string ftrace_prefix_;
 
   internal::RawFetcherTiming timing_;
+  Ftrace ftrace_;
 };
 
 // Raw version of sender.  Sends a block of data.  This is used for reflection
@@ -172,10 +177,12 @@
                       aos::realtime_clock::time_point realtime_remote_time,
                       uint32_t remote_queue_index) = 0;
 
-  EventLoop *event_loop_;
-  const Channel *channel_;
+  EventLoop *const event_loop_;
+  const Channel *const channel_;
+  const std::string ftrace_prefix_;
 
   internal::RawSenderTiming timing_;
+  Ftrace ftrace_;
 
   ChannelPreallocatedAllocator fbb_allocator_{nullptr, 0, nullptr};
 };
@@ -219,6 +226,9 @@
                : nullptr;
   }
 
+  // Returns the channel this fetcher uses
+  const Channel *channel() const { return fetcher_->channel(); }
+
   // Returns the context holding timestamps and other metadata about the
   // message.
   const Context &context() const { return fetcher_->context(); }
@@ -226,6 +236,9 @@
   const T &operator*() const { return *get(); }
   const T *operator->() const { return get(); }
 
+  // Returns true if this fetcher is valid and connected to a channel.
+  operator bool() const { return static_cast<bool>(fetcher_); }
+
  private:
   friend class EventLoop;
   Fetcher(::std::unique_ptr<RawFetcher> fetcher)
@@ -356,6 +369,7 @@
   std::string name_;
 
   internal::TimerTiming timing_;
+  Ftrace ftrace_;
 };
 
 // Interface for phased loops.  They are built on timers.
@@ -402,8 +416,18 @@
   int cycles_elapsed_ = 0;
 
   internal::TimerTiming timing_;
+  Ftrace ftrace_;
 };
 
+inline cpu_set_t MakeCpusetFromCpus(std::initializer_list<int> cpus) {
+  cpu_set_t result;
+  CPU_ZERO(&result);
+  for (int cpu : cpus) {
+    CPU_SET(cpu, &result);
+  }
+  return result;
+}
+
 class EventLoop {
  public:
   EventLoop(const Configuration *configuration);
@@ -414,6 +438,14 @@
   virtual monotonic_clock::time_point monotonic_now() = 0;
   virtual realtime_clock::time_point realtime_now() = 0;
 
+  // Returns true if the channel exists in the configuration.
+  template <typename T>
+  bool HasChannel(const std::string_view channel_name) {
+    return configuration::GetChannel(configuration_, channel_name,
+                                     T::GetFullyQualifiedName(), name(),
+                                     node()) != nullptr;
+  }
+
   // Note, it is supported to create:
   //   multiple fetchers, and (one sender or one watcher) per <name, type>
   //   tuple.
@@ -524,6 +556,10 @@
   virtual void SetRuntimeRealtimePriority(int priority) = 0;
   virtual int priority() const = 0;
 
+  // Sets the scheduler affinity to run the event loop with. This may only be
+  // called before Run().
+  virtual void SetRuntimeAffinity(const cpu_set_t &cpuset) = 0;
+
   // Fetches new messages from the provided channel (path, type).
   //
   // Note: this channel must be a member of the exact configuration object this
@@ -579,12 +615,18 @@
   // watcher must exist before calling this.
   WatcherState *GetWatcherState(const Channel *channel);
 
-  // Returns a Sender's protected RawSender
+  // Returns a Sender's protected RawSender.
   template <typename T>
   static RawSender *GetRawSender(aos::Sender<T> *sender) {
     return sender->sender_.get();
   }
 
+  // Returns a Fetcher's protected RawFetcher.
+  template <typename T>
+  static RawFetcher *GetRawFetcher(aos::Fetcher<T> *fetcher) {
+    return fetcher->fetcher_.get();
+  }
+
   // Context available for watchers, timers, and phased loops.
   Context context_;
 
diff --git a/aos/events/event_loop_param_test.cc b/aos/events/event_loop_param_test.cc
index f0fb3d3..71bef37 100644
--- a/aos/events/event_loop_param_test.cc
+++ b/aos/events/event_loop_param_test.cc
@@ -585,6 +585,17 @@
   EXPECT_DEATH(Run(), "realtime");
 }
 
+// Verify that SetRuntimeAffinity fails while running.
+TEST_P(AbstractEventLoopDeathTest, SetRuntimeAffinity) {
+  auto loop = MakePrimary();
+  // Confirm that runtime priority calls work when not running.
+  loop->SetRuntimeAffinity(MakeCpusetFromCpus({0}));
+
+  loop->OnRun([&]() { loop->SetRuntimeAffinity(MakeCpusetFromCpus({1})); });
+
+  EXPECT_DEATH(Run(), "Cannot set affinity while running");
+}
+
 // Verify that registering a watcher and a sender for "/test" fails.
 TEST_P(AbstractEventLoopDeathTest, WatcherAndSender) {
   auto loop = Make();
@@ -593,6 +604,18 @@
                "/test");
 }
 
+// Verify that creating too many senders fails.
+TEST_P(AbstractEventLoopDeathTest, TooManySenders) {
+  auto loop = Make();
+  std::vector<aos::Sender<TestMessage>> senders;
+  for (int i = 0; i < 10; ++i) {
+    senders.emplace_back(loop->MakeSender<TestMessage>("/test"));
+  }
+  EXPECT_DEATH({ loop->MakeSender<TestMessage>("/test"); },
+               "Failed to create sender on \\{ \"name\": \"/test\", \"type\": "
+               "\"aos.TestMessage\" \\}, too many senders.");
+}
+
 // Verify that we can't create a sender inside OnRun.
 TEST_P(AbstractEventLoopDeathTest, SenderInOnRun) {
   auto loop1 = MakePrimary();
diff --git a/aos/events/event_loop_tmpl.h b/aos/events/event_loop_tmpl.h
index 5534c83..877a946 100644
--- a/aos/events/event_loop_tmpl.h
+++ b/aos/events/event_loop_tmpl.h
@@ -1,7 +1,11 @@
 #ifndef AOS_EVENTS_EVENT_LOOP_TMPL_H_
 #define AOS_EVENTS_EVENT_LOOP_TMPL_H_
 
+#include <inttypes.h>
+#include <stdint.h>
+
 #include <type_traits>
+
 #include "aos/events/event_loop.h"
 #include "glog/logging.h"
 
@@ -72,6 +76,13 @@
   if (result.first) {
     timing_.fetcher->mutate_count(timing_.fetcher->count() + 1);
     const monotonic_clock::time_point monotonic_time = result.second;
+    ftrace_.FormatMessage(
+        "%.*s: fetch next: now=%" PRId64 " event=%" PRId64 " queue=%" PRIu32,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_time.time_since_epoch().count()),
+        static_cast<int64_t>(
+            context_.monotonic_event_time.time_since_epoch().count()),
+        context_.queue_index);
     const float latency =
         std::chrono::duration_cast<std::chrono::duration<float>>(
             monotonic_time - context_.monotonic_event_time)
@@ -79,6 +90,12 @@
     timing_.latency.Add(latency);
     return true;
   }
+  ftrace_.FormatMessage(
+      "%.*s: fetch next: still event=%" PRId64 " queue=%" PRIu32,
+      static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+      static_cast<int64_t>(
+          context_.monotonic_event_time.time_since_epoch().count()),
+      context_.queue_index);
   return false;
 }
 
@@ -87,6 +104,13 @@
   if (result.first) {
     timing_.fetcher->mutate_count(timing_.fetcher->count() + 1);
     const monotonic_clock::time_point monotonic_time = result.second;
+    ftrace_.FormatMessage(
+        "%.*s: fetch latest: now=%" PRId64 " event=%" PRId64 " queue=%" PRIu32,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_time.time_since_epoch().count()),
+        static_cast<int64_t>(
+            context_.monotonic_event_time.time_since_epoch().count()),
+        context_.queue_index);
     const float latency =
         std::chrono::duration_cast<std::chrono::duration<float>>(
             monotonic_time - context_.monotonic_event_time)
@@ -94,6 +118,12 @@
     timing_.latency.Add(latency);
     return true;
   }
+  ftrace_.FormatMessage(
+      "%.*s: fetch latest: still event=%" PRId64 " queue=%" PRIu32,
+      static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+      static_cast<int64_t>(
+          context_.monotonic_event_time.time_since_epoch().count()),
+      context_.queue_index);
   return false;
 }
 
@@ -105,6 +135,11 @@
              remote_queue_index)) {
     timing_.size.Add(size);
     timing_.sender->mutate_count(timing_.sender->count() + 1);
+    ftrace_.FormatMessage(
+        "%.*s: sent internal: event=%" PRId64 " queue=%" PRIu32,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_sent_time().time_since_epoch().count()),
+        sent_queue_index());
     return true;
   }
   return false;
@@ -119,6 +154,11 @@
              remote_queue_index)) {
     timing_.size.Add(size);
     timing_.sender->mutate_count(timing_.sender->count() + 1);
+    ftrace_.FormatMessage(
+        "%.*s: sent external: event=%" PRId64 " queue=%" PRIu32,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_sent_time().time_since_epoch().count()),
+        sent_queue_index());
     return true;
   }
   return false;
@@ -138,6 +178,11 @@
   event_loop_->context_.size = 0;
   event_loop_->context_.data = nullptr;
 
+  ftrace_.FormatMessage(
+      "timer: %.*s: start now=%" PRId64 " event=%" PRId64,
+      static_cast<int>(name_.size()), name_.data(),
+      static_cast<int64_t>(monotonic_start_time.time_since_epoch().count()),
+      static_cast<int64_t>(event_time.time_since_epoch().count()));
   {
     const float start_latency =
         std::chrono::duration_cast<std::chrono::duration<float>>(
@@ -149,6 +194,10 @@
   fn_();
 
   const monotonic_clock::time_point monotonic_end_time = get_time();
+  ftrace_.FormatMessage(
+      "timer: %.*s: end now=%" PRId64, static_cast<int>(name_.size()),
+      name_.data(),
+      static_cast<int64_t>(monotonic_end_time.time_since_epoch().count()));
 
   const float handler_latency =
       std::chrono::duration_cast<std::chrono::duration<float>>(
@@ -176,6 +225,13 @@
   // Compute how many cycles elapsed and schedule the next wakeup.
   Reschedule(schedule, monotonic_start_time);
 
+  ftrace_.FormatMessage(
+      "phased: %.*s: start now=%" PRId64 " event=%" PRId64 " cycles=%d",
+      static_cast<int>(name_.size()), name_.data(),
+      static_cast<int64_t>(monotonic_start_time.time_since_epoch().count()),
+      static_cast<int64_t>(
+          phased_loop_.sleep_time().time_since_epoch().count()),
+      cycles_elapsed_);
   {
     const float start_latency =
         std::chrono::duration_cast<std::chrono::duration<float>>(
@@ -190,6 +246,10 @@
   cycles_elapsed_ = 0;
 
   const monotonic_clock::time_point monotonic_end_time = get_time();
+  ftrace_.FormatMessage(
+      "phased: %.*s: end now=%" PRId64, static_cast<int>(name_.size()),
+      name_.data(),
+      static_cast<int64_t>(monotonic_end_time.time_since_epoch().count()));
 
   const float handler_latency =
       std::chrono::duration_cast<std::chrono::duration<float>>(
@@ -210,7 +270,9 @@
   WatcherState(
       EventLoop *event_loop, const Channel *channel,
       std::function<void(const Context &context, const void *message)> fn)
-      : channel_index_(event_loop->ChannelIndex(channel)), fn_(std::move(fn)) {}
+      : channel_index_(event_loop->ChannelIndex(channel)),
+        ftrace_prefix_(configuration::StrippedChannelToString(channel)),
+        fn_(std::move(fn)) {}
 
   virtual ~WatcherState() {}
 
@@ -222,6 +284,13 @@
       CheckChannelDataAlignment(context.data, context.size);
     }
     const monotonic_clock::time_point monotonic_start_time = get_time();
+    ftrace_.FormatMessage(
+        "%.*s: watcher start: now=%" PRId64 " event=%" PRId64 " queue=%" PRIu32,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_start_time.time_since_epoch().count()),
+        static_cast<int64_t>(
+            context.monotonic_event_time.time_since_epoch().count()),
+        context.queue_index);
     {
       const float start_latency =
           std::chrono::duration_cast<std::chrono::duration<float>>(
@@ -233,6 +302,10 @@
     fn_(context, context.data);
 
     const monotonic_clock::time_point monotonic_end_time = get_time();
+    ftrace_.FormatMessage(
+        "%.*s: watcher end: now=%" PRId64,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_end_time.time_since_epoch().count()));
 
     const float handler_latency =
         std::chrono::duration_cast<std::chrono::duration<float>>(
@@ -250,12 +323,15 @@
 
  protected:
   const int channel_index_;
+  const std::string ftrace_prefix_;
 
-  std::function<void(const Context &context, const void *message)> fn_;
+  const std::function<void(const Context &context, const void *message)> fn_;
 
   internal::TimingStatistic wakeup_latency_;
   internal::TimingStatistic handler_time_;
   timing::Watcher *watcher_ = nullptr;
+
+  Ftrace ftrace_;
 };
 
 template <typename T>
diff --git a/aos/events/logging/BUILD b/aos/events/logging/BUILD
index 2e63f45..52ea5a8 100644
--- a/aos/events/logging/BUILD
+++ b/aos/events/logging/BUILD
@@ -12,28 +12,47 @@
 )
 
 cc_library(
-    name = "logger",
+    name = "logfile_utils",
     srcs = [
         "logfile_utils.cc",
-        "logger.cc",
-        "logger_math.cc",
     ],
     hdrs = [
         "logfile_utils.h",
-        "logger.h",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":logger_fbs",
+        "//aos:configuration",
         "//aos:flatbuffer_merge",
         "//aos/events:event_loop",
+        "//aos/util:file",
+        "@com_github_gflags_gflags//:gflags",
+        "@com_github_google_flatbuffers//:flatbuffers",
+        "@com_github_google_glog//:glog",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+cc_library(
+    name = "logger",
+    srcs = [
+        "logger.cc",
+        "logger_math.cc",
+    ],
+    hdrs = [
+        "logger.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":logfile_utils",
+        ":logger_fbs",
+        "//aos/events:event_loop",
         "//aos/events:simulated_event_loop",
         "//aos/network:team_number",
         "//aos/network:timestamp_filter",
         "//aos/time",
         "@com_github_google_flatbuffers//:flatbuffers",
-        "@com_google_absl//absl/container:inlined_vector",
-        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
         "@org_tuxfamily_eigen//:eigen",
     ],
 )
diff --git a/aos/events/logging/log_cat.cc b/aos/events/logging/log_cat.cc
index a938367..d1481d5 100644
--- a/aos/events/logging/log_cat.cc
+++ b/aos/events/logging/log_cat.cc
@@ -13,11 +13,48 @@
 DEFINE_string(type, "",
               "Channel type to match for printing out channels. Empty means no "
               "type filter.");
+DEFINE_bool(fetch, false,
+            "If true, also print out the messages from before the start of the "
+            "log file");
 DEFINE_bool(raw, false,
             "If true, just print the data out unsorted and unparsed");
+DEFINE_bool(format_raw, true,
+            "If true and --raw is specified, print out raw data, but use the "
+            "schema to format the data.");
 DEFINE_int32(max_vector_size, 100,
              "If positive, vectors longer than this will not be printed");
 
+void LogContext(const aos::Channel *channel, std::string node_name,
+                const aos::Context &context) {
+  // Print the flatbuffer out to stdout, both to remove the
+  // unnecessary cruft from glog and to allow the user to readily
+  // redirect just the logged output independent of any debugging
+  // information on stderr.
+  if (context.monotonic_remote_time != context.monotonic_event_time) {
+    std::cout << node_name << context.realtime_event_time << " ("
+              << context.monotonic_event_time << ") sent "
+              << context.realtime_remote_time << " ("
+              << context.monotonic_remote_time << ") "
+              << channel->name()->c_str() << ' ' << channel->type()->c_str()
+              << ": "
+              << aos::FlatbufferToJson(
+                     channel->schema(),
+                     static_cast<const uint8_t *>(context.data),
+                     FLAGS_max_vector_size)
+              << std::endl;
+  } else {
+    std::cout << node_name << context.realtime_event_time << " ("
+              << context.monotonic_event_time << ") "
+              << channel->name()->c_str() << ' ' << channel->type()->c_str()
+              << ": "
+              << aos::FlatbufferToJson(
+                     channel->schema(),
+                     static_cast<const uint8_t *>(context.data),
+                     FLAGS_max_vector_size)
+              << std::endl;
+  }
+}
+
 int main(int argc, char **argv) {
   gflags::SetUsageMessage(
       "Usage:\n"
@@ -45,9 +82,24 @@
       if (!message) {
         break;
       }
+      const aos::Channel *channel =
+          reader.log_file_header()->configuration()->channels()->Get(
+              message.value().message().channel_index());
 
-      std::cout << aos::FlatbufferToJson(message.value(), FLAGS_max_vector_size)
-                << std::endl;
+      if (FLAGS_format_raw && message.value().message().data() != nullptr) {
+        std::cout << aos::configuration::StrippedChannelToString(channel) << " "
+                  << aos::FlatbufferToJson(message.value(), false, 4) << ": "
+                  << aos::FlatbufferToJson(
+                         channel->schema(),
+                         message.value().message().data()->data(),
+                         FLAGS_max_vector_size)
+                  << std::endl;
+      } else {
+        std::cout << aos::configuration::StrippedChannelToString(channel) << " "
+                  << aos::FlatbufferToJson(message.value(), false,
+                                           FLAGS_max_vector_size)
+                  << std::endl;
+      }
     }
     return 0;
   }
@@ -63,16 +115,22 @@
   }
 
   aos::logger::LogReader reader(logfiles);
-  reader.Register();
+
+  aos::SimulatedEventLoopFactory event_loop_factory(reader.configuration());
+  reader.Register(&event_loop_factory);
 
   std::vector<std::unique_ptr<aos::EventLoop>> printer_event_loops;
 
   for (const aos::Node *node : reader.Nodes()) {
     std::unique_ptr<aos::EventLoop> printer_event_loop =
-        reader.event_loop_factory()->MakeEventLoop("printer", node);
+        event_loop_factory.MakeEventLoop("printer", node);
     printer_event_loop->SkipTimingReport();
     printer_event_loop->SkipAosLog();
 
+    std::vector<std::tuple<aos::monotonic_clock::time_point, std::string,
+                           std::unique_ptr<aos::RawFetcher>>>
+        messages;
+
     bool found_channel = false;
     const flatbuffers::Vector<flatbuffers::Offset<aos::Channel>> *channels =
         reader.configuration()->channels();
@@ -93,37 +151,43 @@
                             : std::string(node->name()->string_view()) + " ";
 
         CHECK_NOTNULL(channel->schema());
+
+        if (FLAGS_fetch) {
+          // Grab the last message on each channel.
+          std::unique_ptr<aos::RawFetcher> fetcher =
+              printer_event_loop->MakeRawFetcher(channel);
+          if (fetcher->Fetch()) {
+            auto message =
+                std::make_tuple(fetcher->context().monotonic_event_time,
+                                node_name, std::move(fetcher));
+
+            // Insert it sorted into the vector so we can print in time order
+            // instead of channel order at the start.
+            auto it = std::lower_bound(
+                messages.begin(), messages.end(), message,
+                [](const std::tuple<aos::monotonic_clock::time_point,
+                                    std::string,
+                                    std::unique_ptr<aos::RawFetcher>> &a,
+                   const std::tuple<aos::monotonic_clock::time_point,
+                                    std::string,
+                                    std::unique_ptr<aos::RawFetcher>> &b) {
+                  if (std::get<0>(a) < std::get<0>(b)) {
+                    return true;
+                  }
+                  if (std::get<0>(a) > std::get<0>(b)) {
+                    return false;
+                  }
+
+                  return std::get<2>(a)->channel() < std::get<2>(b)->channel();
+                });
+            messages.insert(it, std::move(message));
+          }
+        }
+
         printer_event_loop->MakeRawWatcher(
             channel, [channel, node_name](const aos::Context &context,
-                                          const void *message) {
-              // Print the flatbuffer out to stdout, both to remove the
-              // unnecessary cruft from glog and to allow the user to readily
-              // redirect just the logged output independent of any debugging
-              // information on stderr.
-              if (context.monotonic_remote_time !=
-                  context.monotonic_event_time) {
-                std::cout << node_name << context.realtime_event_time << " ("
-                          << context.monotonic_event_time << ") sent "
-                          << context.realtime_remote_time << " ("
-                          << context.monotonic_remote_time << ") "
-                          << channel->name()->c_str() << ' '
-                          << channel->type()->c_str() << ": "
-                          << aos::FlatbufferToJson(
-                                 channel->schema(),
-                                 static_cast<const uint8_t *>(message),
-                                 FLAGS_max_vector_size)
-                          << std::endl;
-              } else {
-                std::cout << node_name << context.realtime_event_time << " ("
-                          << context.monotonic_event_time << ") "
-                          << channel->name()->c_str() << ' '
-                          << channel->type()->c_str() << ": "
-                          << aos::FlatbufferToJson(
-                                 channel->schema(),
-                                 static_cast<const uint8_t *>(message),
-                                 FLAGS_max_vector_size)
-                          << std::endl;
-              }
+                                          const void * /*message*/) {
+              LogContext(channel, node_name, context);
             });
         found_channel = true;
       }
@@ -132,10 +196,22 @@
     if (!found_channel) {
       LOG(FATAL) << "Could not find any channels";
     }
+    // TODO(austin): Sort between nodes too when it becomes annoying enough.
+    for (const std::tuple<aos::monotonic_clock::time_point, std::string,
+                          std::unique_ptr<aos::RawFetcher>> &message :
+         messages) {
+      LogContext(std::get<2>(message)->channel(), std::get<1>(message),
+                 std::get<2>(message)->context());
+    }
     printer_event_loops.emplace_back(std::move(printer_event_loop));
   }
 
-  reader.event_loop_factory()->Run();
+  if (FLAGS_fetch) {
+    // New line to separate fetched messages from non-fetched messages.
+    std::cout << std::endl;
+  }
+
+  event_loop_factory.Run();
 
   aos::Cleanup();
   return 0;
diff --git a/aos/events/logging/logfile_utils.cc b/aos/events/logging/logfile_utils.cc
index 5388df7..2cc2f61 100644
--- a/aos/events/logging/logfile_utils.cc
+++ b/aos/events/logging/logfile_utils.cc
@@ -62,6 +62,7 @@
 
   PCHECK(written == static_cast<ssize_t>(n.iov_len))
       << ": Wrote " << written << " expected " << n.iov_len;
+  written_size_ += written;
 }
 
 void DetachedBufferWriter::QueueSizedFlatbuffer(
@@ -96,6 +97,7 @@
 
   PCHECK(written == static_cast<ssize_t>(queued_size_))
       << ": Wrote " << written << " expected " << queued_size_;
+  written_size_ += written;
 
   queued_size_ = 0;
   queue_.clear();
@@ -417,8 +419,7 @@
   bool was_emplaced = false;
   while (true) {
     // Stop if we have enough.
-    if (newest_timestamp() >
-            time_to_queue_ + max_out_of_order_duration() &&
+    if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&
         was_emplaced) {
       VLOG(1) << "Done queueing on " << this << ", queued to "
               << newest_timestamp() << " with requeue time " << time_to_queue_;
@@ -1007,7 +1008,21 @@
 
     for (const std::unique_ptr<SplitMessageReader> &reader :
          split_message_readers_) {
-      if (CompareFlatBuffer(reader->node(), target_node)) {
+      // In order to identify which logfile(s) map to the target node, do a
+      // logical comparison of the nodes, by confirming that we are either in a
+      // single-node setup (where the nodes will both be nullptr) or that the
+      // node names match (but the other node fields--e.g., hostname lists--may
+      // not).
+      const bool both_null =
+          reader->node() == nullptr && target_node == nullptr;
+      const bool both_have_name =
+          (reader->node() != nullptr) && (target_node != nullptr) &&
+          (reader->node()->has_name() && target_node->has_name());
+      const bool node_names_identical =
+          both_have_name &&
+          (reader->node()->name()->string_view() ==
+           target_node->name()->string_view());
+      if (both_null || node_names_identical) {
         if (!found_node) {
           found_node = true;
           log_file_header_ = CopyFlatBuffer(reader->log_file_header());
diff --git a/aos/events/logging/logfile_utils.h b/aos/events/logging/logfile_utils.h
index 4ab4dca..7acbbd3 100644
--- a/aos/events/logging/logfile_utils.h
+++ b/aos/events/logging/logfile_utils.h
@@ -7,6 +7,7 @@
 #include <optional>
 #include <string>
 #include <string_view>
+#include <tuple>
 #include <vector>
 
 #include "absl/types/span.h"
@@ -38,6 +39,9 @@
   DetachedBufferWriter(std::string_view filename);
   ~DetachedBufferWriter();
 
+  DetachedBufferWriter(const DetachedBufferWriter &) = delete;
+  DetachedBufferWriter &operator=(const DetachedBufferWriter &) = delete;
+
   std::string_view filename() const { return filename_; }
 
   // TODO(austin): Snappy compress the log file if it ends with .snappy!
@@ -53,6 +57,12 @@
   // Triggers data to be provided to the kernel and written.
   void Flush();
 
+  // Returns the number of bytes written.
+  size_t written_size() const { return written_size_; }
+
+  // Returns the number of bytes written or currently queued.
+  size_t total_size() const { return written_size_ + queued_size_; }
+
  private:
   const std::string filename_;
 
@@ -60,6 +70,7 @@
 
   // Size of all the data in the queue.
   size_t queued_size_ = 0;
+  size_t written_size_ = 0;
 
   // List of buffers to flush.
   std::vector<flatbuffers::DetachedBuffer> queue_;
diff --git a/aos/events/logging/logger.cc b/aos/events/logging/logger.cc
index de9d344..b3472d7 100644
--- a/aos/events/logging/logger.cc
+++ b/aos/events/logging/logger.cc
@@ -312,9 +312,16 @@
     state->channel_merger = std::make_unique<ChannelMerger>(filenames);
   } else {
     if (replay_configuration) {
-      CHECK_EQ(configuration()->nodes()->size(),
+      CHECK_EQ(logged_configuration()->nodes()->size(),
                replay_configuration->nodes()->size())
           << ": Log file and replay config need to have matching nodes lists.";
+      for (const Node *node : *logged_configuration()->nodes()) {
+        if (configuration::GetNode(replay_configuration, node) == nullptr) {
+          LOG(FATAL)
+              << "Found node " << FlatbufferToJson(node)
+              << " in logged config that is not present in the replay config.";
+        }
+      }
     }
     states_.resize(configuration()->nodes()->size());
   }
@@ -387,6 +394,11 @@
 
     Register(state->event_loop_unique_ptr.get());
   }
+  if (live_nodes_ == 0) {
+    LOG(FATAL)
+        << "Don't have logs from any of the nodes in the replay config--are "
+           "you sure that the replay config matches the original config?";
+  }
 
   // We need to now seed our per-node time offsets and get everything set up to
   // run.
diff --git a/aos/events/logging/logger.h b/aos/events/logging/logger.h
index 34dbd24..c08d6d4 100644
--- a/aos/events/logging/logger.h
+++ b/aos/events/logging/logger.h
@@ -340,18 +340,25 @@
     RemapLoggedChannel(name, T::GetFullyQualifiedName(), add_prefix);
   }
 
+  template <typename T>
+  bool HasChannel(std::string_view name) {
+    return configuration::GetChannel(log_file_header()->configuration(), name,
+                                     T::GetFullyQualifiedName(), "",
+                                     nullptr) != nullptr;
+  }
+
   SimulatedEventLoopFactory *event_loop_factory() {
     return event_loop_factory_;
   }
 
+  const LogFileHeader *log_file_header() const {
+    return &log_file_header_.message();
+  }
+
  private:
   const Channel *RemapChannel(const EventLoop *event_loop,
                               const Channel *channel);
 
-  const LogFileHeader *log_file_header() const {
-    return &log_file_header_.message();
-  }
-
   // Queues at least max_out_of_order_duration_ messages into channels_.
   void QueueMessages();
   // Handle constructing a configuration with all the additional remapped
@@ -424,6 +431,8 @@
   // Returns the offset from the monotonic clock for a node to the distributed
   // clock.  distributed = monotonic + offset;
   std::chrono::nanoseconds offset(int node_index) const {
+    CHECK_LT(node_index, offset_matrix_.rows())
+        << ": Got too high of a node index.";
     return -std::chrono::duration_cast<std::chrono::nanoseconds>(
                std::chrono::duration<double>(offset_matrix_(node_index))) -
            base_offset_matrix_(node_index);
diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index 9e69ae4..b894bf7 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc
@@ -505,6 +505,77 @@
   reader.Deregister();
 }
 
+typedef MultinodeLoggerTest MultinodeLoggerDeathTest;
+
+// Test that if we feed the replay with a mismatched node list that we die on
+// the LogReader constructor.
+TEST_F(MultinodeLoggerDeathTest, MultiNodeBadReplayConfig) {
+  const ::std::string tmpdir(getenv("TEST_TMPDIR"));
+  const ::std::string logfile_base = tmpdir + "/multi_logfile";
+  const ::std::string logfile1 = logfile_base + "_pi1_data.bfbs";
+  const ::std::string logfile2 =
+      logfile_base + "_pi2_data/test/aos.examples.Pong.bfbs";
+  const ::std::string logfile3 = logfile_base + "_pi2_data.bfbs";
+
+  // Remove them.
+  unlink(logfile1.c_str());
+  unlink(logfile2.c_str());
+  unlink(logfile3.c_str());
+
+  LOG(INFO) << "Logging data to " << logfile1 << ", " << logfile2 << " and "
+            << logfile3;
+
+  {
+    std::unique_ptr<EventLoop> ping_event_loop =
+        event_loop_factory_.MakeEventLoop("ping", pi1_);
+    Ping ping(ping_event_loop.get());
+    std::unique_ptr<EventLoop> pong_event_loop =
+        event_loop_factory_.MakeEventLoop("pong", pi2_);
+    Pong pong(pong_event_loop.get());
+
+    std::unique_ptr<EventLoop> pi1_logger_event_loop =
+        event_loop_factory_.MakeEventLoop("logger", pi1_);
+    std::unique_ptr<LogNamer> pi1_log_namer =
+        std::make_unique<MultiNodeLogNamer>(
+            logfile_base, pi1_logger_event_loop->configuration(),
+            pi1_logger_event_loop->node());
+
+    std::unique_ptr<EventLoop> pi2_logger_event_loop =
+        event_loop_factory_.MakeEventLoop("logger", pi2_);
+    std::unique_ptr<LogNamer> pi2_log_namer =
+        std::make_unique<MultiNodeLogNamer>(
+            logfile_base, pi2_logger_event_loop->configuration(),
+            pi2_logger_event_loop->node());
+
+    event_loop_factory_.RunFor(chrono::milliseconds(95));
+
+    Logger pi1_logger(std::move(pi1_log_namer), pi1_logger_event_loop.get(),
+                      chrono::milliseconds(100));
+
+    Logger pi2_logger(std::move(pi2_log_namer), pi2_logger_event_loop.get(),
+                      chrono::milliseconds(100));
+    event_loop_factory_.RunFor(chrono::milliseconds(20000));
+  }
+
+  // Test that, if we add an additional node to the replay config that the
+  // logger complains about the mismatch in number of nodes.
+  FlatbufferDetachedBuffer<Configuration> extra_nodes_config =
+      configuration::MergeWithConfig(&config_.message(), R"({
+          "nodes": [
+            {
+              "name": "extra-node"
+            }
+          ]
+        }
+      )");
+
+  EXPECT_DEATH(LogReader({std::vector<std::string>{logfile1},
+                          std::vector<std::string>{logfile3}},
+                         &extra_nodes_config.message()),
+               "Log file and replay config need to have matching nodes lists.");
+  ;
+}
+
 // Tests that we can read log files where they don't start at the same monotonic
 // time.
 TEST_F(MultinodeLoggerTest, StaggeredStart) {
diff --git a/aos/events/shm_event_loop.cc b/aos/events/shm_event_loop.cc
index 97c9963..dcb7303 100644
--- a/aos/events/shm_event_loop.cc
+++ b/aos/events/shm_event_loop.cc
@@ -47,6 +47,8 @@
 
 namespace aos {
 
+using namespace shm_event_loop_internal;
+
 void SetShmBase(const std::string_view base) {
   FLAGS_shm_base = std::string(base) + "/dev/shm/aos";
 }
@@ -107,7 +109,7 @@
     // that fails, the file has already been created and we can open it
     // normally..  Once the file has been created it wil never be deleted.
     int fd = open(path.c_str(), O_RDWR | O_CREAT | O_EXCL,
-               O_CLOEXEC | FLAGS_permissions);
+                  O_CLOEXEC | FLAGS_permissions);
     if (fd == -1 && errno == EEXIST) {
       VLOG(1) << path << " already created.";
       // File already exists.
@@ -142,9 +144,7 @@
     ipc_lib::InitializeLocklessQueueMemory(memory(), config_);
   }
 
-  ~MMapedQueue() {
-    PCHECK(munmap(data_, size_) == 0);
-  }
+  ~MMapedQueue() { PCHECK(munmap(data_, size_) == 0); }
 
   ipc_lib::LocklessQueueMemory *memory() const {
     return reinterpret_cast<ipc_lib::LocklessQueueMemory *>(data_);
@@ -186,7 +186,7 @@
   }
 }
 
-namespace internal {
+namespace shm_event_loop_internal {
 
 class SimpleShmFetcher {
  public:
@@ -342,6 +342,13 @@
     return lockless_queue_memory_.GetSharedMemory();
   }
 
+  absl::Span<char> GetPrivateMemory() const {
+    CHECK(copy_data());
+    return absl::Span<char>(
+        const_cast<SimpleShmFetcher *>(this)->data_storage_start(),
+        lockless_queue_.message_data_size());
+  }
+
  private:
   char *data_storage_start() {
     if (!copy_data()) return nullptr;
@@ -386,6 +393,10 @@
     return std::make_pair(false, monotonic_clock::min_time);
   }
 
+  absl::Span<char> GetPrivateMemory() const {
+    return simple_shm_fetcher_.GetPrivateMemory();
+  }
+
  private:
   SimpleShmFetcher simple_shm_fetcher_;
 };
@@ -400,10 +411,22 @@
                 event_loop->configuration()->channel_storage_duration()))),
         lockless_queue_(lockless_queue_memory_.memory(),
                         lockless_queue_memory_.config()),
-        lockless_queue_sender_(lockless_queue_.MakeSender()) {}
+        lockless_queue_sender_(
+            VerifySender(lockless_queue_.MakeSender(), channel)) {}
 
   ~ShmSender() override {}
 
+  static ipc_lib::LocklessQueue::Sender VerifySender(
+      std::optional<ipc_lib::LocklessQueue::Sender> &&sender,
+      const Channel *channel) {
+    if (sender) {
+      return std::move(sender.value());
+    }
+    LOG(FATAL) << "Failed to create sender on "
+               << configuration::CleanedChannelToString(channel)
+               << ", too many senders.";
+  }
+
   void *data() override { return lockless_queue_sender_.Data(); }
   size_t size() override { return lockless_queue_sender_.size(); }
   bool DoSend(size_t length,
@@ -447,18 +470,18 @@
 };
 
 // Class to manage the state for a Watcher.
-class WatcherState : public aos::WatcherState {
+class ShmWatcherState : public WatcherState {
  public:
-  WatcherState(
+  ShmWatcherState(
       ShmEventLoop *event_loop, const Channel *channel,
       std::function<void(const Context &context, const void *message)> fn,
       bool copy_data)
-      : aos::WatcherState(event_loop, channel, std::move(fn)),
+      : WatcherState(event_loop, channel, std::move(fn)),
         event_loop_(event_loop),
         event_(this),
         simple_shm_fetcher_(event_loop, channel, copy_data) {}
 
-  ~WatcherState() override { event_loop_->RemoveEvent(&event_); }
+  ~ShmWatcherState() override { event_loop_->RemoveEvent(&event_); }
 
   void Startup(EventLoop *event_loop) override {
     simple_shm_fetcher_.PointAtNextQueueIndex();
@@ -503,14 +526,14 @@
   bool has_new_data_ = false;
 
   ShmEventLoop *event_loop_;
-  EventHandler<WatcherState> event_;
+  EventHandler<ShmWatcherState> event_;
   SimpleShmFetcher simple_shm_fetcher_;
 };
 
 // Adapter class to adapt a timerfd to a TimerHandler.
-class TimerHandlerState final : public TimerHandler {
+class ShmTimerHandler final : public TimerHandler {
  public:
-  TimerHandlerState(ShmEventLoop *shm_event_loop, ::std::function<void()> fn)
+  ShmTimerHandler(ShmEventLoop *shm_event_loop, ::std::function<void()> fn)
       : TimerHandler(shm_event_loop, std::move(fn)),
         shm_event_loop_(shm_event_loop),
         event_(this) {
@@ -524,7 +547,7 @@
     });
   }
 
-  ~TimerHandlerState() {
+  ~ShmTimerHandler() {
     Disable();
     shm_event_loop_->epoll_.DeleteFd(timerfd_.fd());
   }
@@ -576,21 +599,22 @@
 
  private:
   ShmEventLoop *shm_event_loop_;
-  EventHandler<TimerHandlerState> event_;
+  EventHandler<ShmTimerHandler> event_;
 
-  TimerFd timerfd_;
+  internal::TimerFd timerfd_;
 
   monotonic_clock::time_point base_;
   monotonic_clock::duration repeat_offset_;
 };
 
 // Adapter class to the timerfd and PhasedLoop.
-class PhasedLoopHandler final : public ::aos::PhasedLoopHandler {
+class ShmPhasedLoopHandler final : public PhasedLoopHandler {
  public:
-  PhasedLoopHandler(ShmEventLoop *shm_event_loop, ::std::function<void(int)> fn,
-                    const monotonic_clock::duration interval,
-                    const monotonic_clock::duration offset)
-      : aos::PhasedLoopHandler(shm_event_loop, std::move(fn), interval, offset),
+  ShmPhasedLoopHandler(ShmEventLoop *shm_event_loop,
+                       ::std::function<void(int)> fn,
+                       const monotonic_clock::duration interval,
+                       const monotonic_clock::duration offset)
+      : PhasedLoopHandler(shm_event_loop, std::move(fn), interval, offset),
         shm_event_loop_(shm_event_loop),
         event_(this) {
     shm_event_loop_->epoll_.OnReadable(
@@ -612,7 +636,7 @@
     });
   }
 
-  ~PhasedLoopHandler() override {
+  ~ShmPhasedLoopHandler() override {
     shm_event_loop_->epoll_.DeleteFd(timerfd_.fd());
     shm_event_loop_->RemoveEvent(&event_);
   }
@@ -630,11 +654,12 @@
   }
 
   ShmEventLoop *shm_event_loop_;
-  EventHandler<PhasedLoopHandler> event_;
+  EventHandler<ShmPhasedLoopHandler> event_;
 
-  TimerFd timerfd_;
+  internal::TimerFd timerfd_;
 };
-}  // namespace internal
+
+}  // namespace shm_event_loop_internal
 
 ::std::unique_ptr<RawFetcher> ShmEventLoop::MakeRawFetcher(
     const Channel *channel) {
@@ -645,14 +670,14 @@
                   "configuration.";
   }
 
-  return ::std::unique_ptr<RawFetcher>(new internal::ShmFetcher(this, channel));
+  return ::std::unique_ptr<RawFetcher>(new ShmFetcher(this, channel));
 }
 
 ::std::unique_ptr<RawSender> ShmEventLoop::MakeRawSender(
     const Channel *channel) {
   TakeSender(channel);
 
-  return ::std::unique_ptr<RawSender>(new internal::ShmSender(this, channel));
+  return ::std::unique_ptr<RawSender>(new ShmSender(this, channel));
 }
 
 void ShmEventLoop::MakeRawWatcher(
@@ -661,7 +686,7 @@
   TakeWatcher(channel);
 
   NewWatcher(::std::unique_ptr<WatcherState>(
-      new internal::WatcherState(this, channel, std::move(watcher), true)));
+      new ShmWatcherState(this, channel, std::move(watcher), true)));
 }
 
 void ShmEventLoop::MakeRawNoArgWatcher(
@@ -669,7 +694,7 @@
     std::function<void(const Context &context)> watcher) {
   TakeWatcher(channel);
 
-  NewWatcher(::std::unique_ptr<WatcherState>(new internal::WatcherState(
+  NewWatcher(::std::unique_ptr<WatcherState>(new ShmWatcherState(
       this, channel,
       [watcher](const Context &context, const void *) { watcher(context); },
       false)));
@@ -677,16 +702,15 @@
 
 TimerHandler *ShmEventLoop::AddTimer(::std::function<void()> callback) {
   return NewTimer(::std::unique_ptr<TimerHandler>(
-      new internal::TimerHandlerState(this, ::std::move(callback))));
+      new ShmTimerHandler(this, ::std::move(callback))));
 }
 
 PhasedLoopHandler *ShmEventLoop::AddPhasedLoop(
     ::std::function<void(int)> callback,
     const monotonic_clock::duration interval,
     const monotonic_clock::duration offset) {
-  return NewPhasedLoop(
-      ::std::unique_ptr<PhasedLoopHandler>(new internal::PhasedLoopHandler(
-          this, ::std::move(callback), interval, offset)));
+  return NewPhasedLoop(::std::unique_ptr<PhasedLoopHandler>(
+      new ShmPhasedLoopHandler(this, ::std::move(callback), interval, offset)));
 }
 
 void ShmEventLoop::OnRun(::std::function<void()> on_run) {
@@ -696,8 +720,8 @@
 void ShmEventLoop::HandleEvent() {
   // Update all the times for handlers.
   for (::std::unique_ptr<WatcherState> &base_watcher : watchers_) {
-    internal::WatcherState *watcher =
-        reinterpret_cast<internal::WatcherState *>(base_watcher.get());
+    ShmWatcherState *watcher =
+        reinterpret_cast<ShmWatcherState *>(base_watcher.get());
 
     watcher->CheckForNewData();
   }
@@ -837,6 +861,10 @@
     }
 
     aos::SetCurrentThreadName(name_.substr(0, 16));
+    const cpu_set_t default_affinity = DefaultAffinity();
+    if (!CPU_EQUAL(&affinity_, &default_affinity)) {
+      ::aos::SetCurrentThreadAffinity(affinity_);
+    }
     // Now, all the callbacks are setup.  Lock everything into memory and go RT.
     if (priority_ != 0) {
       ::aos::InitRT();
@@ -870,8 +898,8 @@
   }
 
   for (::std::unique_ptr<WatcherState> &base_watcher : watchers_) {
-    internal::WatcherState *watcher =
-        reinterpret_cast<internal::WatcherState *>(base_watcher.get());
+    ShmWatcherState *watcher =
+        reinterpret_cast<ShmWatcherState *>(base_watcher.get());
     watcher->UnregisterWakeup();
   }
 
@@ -906,20 +934,32 @@
   priority_ = priority;
 }
 
+void ShmEventLoop::SetRuntimeAffinity(const cpu_set_t &cpuset) {
+  if (is_running()) {
+    LOG(FATAL) << "Cannot set affinity while running.";
+  }
+  affinity_ = cpuset;
+}
+
 void ShmEventLoop::set_name(const std::string_view name) {
   name_ = std::string(name);
   UpdateTimingReport();
 }
 
 absl::Span<char> ShmEventLoop::GetWatcherSharedMemory(const Channel *channel) {
-  internal::WatcherState *const watcher_state =
-      static_cast<internal::WatcherState *>(GetWatcherState(channel));
+  ShmWatcherState *const watcher_state =
+      static_cast<ShmWatcherState *>(GetWatcherState(channel));
   return watcher_state->GetSharedMemory();
 }
 
 absl::Span<char> ShmEventLoop::GetShmSenderSharedMemory(
     const aos::RawSender *sender) const {
-  return static_cast<const internal::ShmSender *>(sender)->GetSharedMemory();
+  return static_cast<const ShmSender *>(sender)->GetSharedMemory();
+}
+
+absl::Span<char> ShmEventLoop::GetShmFetcherPrivateMemory(
+    const aos::RawFetcher *fetcher) const {
+  return static_cast<const ShmFetcher *>(fetcher)->GetPrivateMemory();
 }
 
 pid_t ShmEventLoop::GetTid() { return syscall(SYS_gettid); }
diff --git a/aos/events/shm_event_loop.h b/aos/events/shm_event_loop.h
index d3f1295..55fc85f 100644
--- a/aos/events/shm_event_loop.h
+++ b/aos/events/shm_event_loop.h
@@ -10,15 +10,15 @@
 #include "aos/events/event_loop_generated.h"
 
 namespace aos {
-namespace internal {
+namespace shm_event_loop_internal {
 
-class WatcherState;
-class TimerHandlerState;
-class PhasedLoopHandler;
+class ShmWatcherState;
+class ShmTimerHandler;
+class ShmPhasedLoopHandler;
 class ShmSender;
 class ShmFetcher;
 
-}  // namespace internal
+}  // namespace shm_event_loop_internal
 
 // Specialization of EventLoop that is built from queues running out of shared
 // memory.
@@ -57,15 +57,15 @@
       std::function<void(const Context &context)> watcher) override;
 
   TimerHandler *AddTimer(std::function<void()> callback) override;
-  aos::PhasedLoopHandler *AddPhasedLoop(
-      std::function<void(int)> callback,
-      const monotonic_clock::duration interval,
-      const monotonic_clock::duration offset =
-          std::chrono::seconds(0)) override;
+  PhasedLoopHandler *AddPhasedLoop(std::function<void(int)> callback,
+                                   const monotonic_clock::duration interval,
+                                   const monotonic_clock::duration offset =
+                                       std::chrono::seconds(0)) override;
 
   void OnRun(std::function<void()> on_run) override;
 
   void SetRuntimeRealtimePriority(int priority) override;
+  void SetRuntimeAffinity(const cpu_set_t &cpuset) override;
 
   void set_name(const std::string_view name) override;
   const std::string_view name() const override { return name_; }
@@ -81,29 +81,49 @@
   // this.
   absl::Span<char> GetWatcherSharedMemory(const Channel *channel);
 
-  // Returns the local mapping of the shared memory used by the provided Sender
+  // Returns the local mapping of the shared memory used by the provided Sender.
   template <typename T>
   absl::Span<char> GetSenderSharedMemory(aos::Sender<T> *sender) const {
     return GetShmSenderSharedMemory(GetRawSender(sender));
   }
 
+  // Returns the local mapping of the private memory used by the provided
+  // Fetcher to hold messages.
+  template <typename T>
+  absl::Span<char> GetFetcherPrivateMemory(aos::Fetcher<T> *fetcher) const {
+    return GetShmFetcherPrivateMemory(GetRawFetcher(fetcher));
+  }
+
  private:
-  friend class internal::WatcherState;
-  friend class internal::TimerHandlerState;
-  friend class internal::PhasedLoopHandler;
-  friend class internal::ShmSender;
-  friend class internal::ShmFetcher;
+  friend class shm_event_loop_internal::ShmWatcherState;
+  friend class shm_event_loop_internal::ShmTimerHandler;
+  friend class shm_event_loop_internal::ShmPhasedLoopHandler;
+  friend class shm_event_loop_internal::ShmSender;
+  friend class shm_event_loop_internal::ShmFetcher;
+
+  static cpu_set_t DefaultAffinity() {
+    cpu_set_t result;
+    for (int i = 0; i < CPU_SETSIZE; ++i) {
+      CPU_SET(i, &result);
+    }
+    return result;
+  }
 
   void HandleEvent();
 
   // Returns the TID of the event loop.
   pid_t GetTid() override;
 
-  // Private method to access the shared memory mapping of a ShmSender
+  // Private method to access the shared memory mapping of a ShmSender.
   absl::Span<char> GetShmSenderSharedMemory(const aos::RawSender *sender) const;
 
+  // Private method to access the private memory mapping of a ShmFetcher.
+  absl::Span<char> GetShmFetcherPrivateMemory(
+      const aos::RawFetcher *fetcher) const;
+
   std::vector<std::function<void()>> on_run_;
   int priority_ = 0;
+  cpu_set_t affinity_ = DefaultAffinity();
   std::string name_;
   const Node *const node_;
 
diff --git a/aos/events/shm_event_loop_test.cc b/aos/events/shm_event_loop_test.cc
index 0c39704..3244c39 100644
--- a/aos/events/shm_event_loop_test.cc
+++ b/aos/events/shm_event_loop_test.cc
@@ -207,11 +207,21 @@
   auto generic_loop1 = factory.MakePrimary("primary");
   ShmEventLoop *const loop1 = static_cast<ShmEventLoop *>(generic_loop1.get());
 
-  // check that GetSenderSharedMemory returns non-null/non-empty memory span
+  // check that GetSenderSharedMemory returns non-null/non-empty memory span.
   auto sender = loop1->MakeSender<TestMessage>("/test");
   EXPECT_FALSE(loop1->GetSenderSharedMemory(&sender).empty());
 }
 
+TEST(ShmEventLoopTest, GetFetcherPrivateMemory) {
+  ShmEventLoopTestFactory factory;
+  auto generic_loop1 = factory.MakePrimary("primary");
+  ShmEventLoop *const loop1 = static_cast<ShmEventLoop *>(generic_loop1.get());
+
+  // check that GetFetcherPrivateMemory returns non-null/non-empty memory span.
+  auto fetcher = loop1->MakeFetcher<TestMessage>("/test");
+  EXPECT_FALSE(loop1->GetFetcherPrivateMemory(&fetcher).empty());
+}
+
 // TODO(austin): Test that missing a deadline with a timer recovers as expected.
 
 }  // namespace testing
diff --git a/aos/events/simulated_event_loop.cc b/aos/events/simulated_event_loop.cc
index cf58b46..ee4e6d6 100644
--- a/aos/events/simulated_event_loop.cc
+++ b/aos/events/simulated_event_loop.cc
@@ -102,6 +102,19 @@
 
   const Channel *channel() const { return channel_; }
 
+  void CountSenderCreated() {
+    if (sender_count_ >= channel()->num_senders()) {
+      LOG(FATAL) << "Failed to create sender on "
+                 << configuration::CleanedChannelToString(channel())
+                 << ", too many senders.";
+    }
+    ++sender_count_;
+  }
+  void CountSenderDestroyed() {
+    --sender_count_;
+    CHECK_GE(sender_count_, 0);
+  }
+
  private:
   const Channel *channel_;
 
@@ -114,6 +127,8 @@
   EventScheduler *scheduler_;
 
   ipc_lib::QueueIndex next_queue_index_;
+
+  int sender_count_ = 0;
 };
 
 namespace {
@@ -134,8 +149,10 @@
   SimulatedSender(SimulatedChannel *simulated_channel, EventLoop *event_loop)
       : RawSender(event_loop, simulated_channel->channel()),
         simulated_channel_(simulated_channel),
-        event_loop_(event_loop) {}
-  ~SimulatedSender() {}
+        event_loop_(event_loop) {
+    simulated_channel_->CountSenderCreated();
+  }
+  ~SimulatedSender() { simulated_channel_->CountSenderDestroyed(); }
 
   void *data() override {
     if (!message_) {
@@ -414,6 +431,10 @@
 
   int priority() const override { return priority_; }
 
+  void SetRuntimeAffinity(const cpu_set_t & /*cpuset*/) override {
+    CHECK(!is_running()) << ": Cannot set affinity while running.";
+  }
+
   void Setup() {
     MaybeScheduleTimingReports();
     if (!skip_logger_) {
diff --git a/aos/events/simulated_event_loop.h b/aos/events/simulated_event_loop.h
index a7f7920..c8029de 100644
--- a/aos/events/simulated_event_loop.h
+++ b/aos/events/simulated_event_loop.h
@@ -2,6 +2,7 @@
 #define AOS_EVENTS_SIMULATED_EVENT_LOOP_H_
 
 #include <algorithm>
+#include <functional>
 #include <map>
 #include <memory>
 #include <string_view>
@@ -87,7 +88,9 @@
   std::chrono::nanoseconds send_delay() const { return send_delay_; }
 
   // Sets the simulated network delay for messages forwarded between nodes.
-  void set_network_delay(std::chrono::nanoseconds network_delay);
+  void set_network_delay(std::chrono::nanoseconds network_delay) {
+    network_delay_ = network_delay;
+  }
   std::chrono::nanoseconds network_delay() const { return network_delay_; }
 
   // Returns the clock used to synchronize the nodes.
diff --git a/aos/flatbuffer_introspection.cc b/aos/flatbuffer_introspection.cc
index 036a830..ae402fe 100644
--- a/aos/flatbuffer_introspection.cc
+++ b/aos/flatbuffer_introspection.cc
@@ -70,7 +70,8 @@
     const reflection::Object *obj,
     const flatbuffers::Vector<flatbuffers::Offset<reflection::Object>> *objects,
     const flatbuffers::Vector<flatbuffers::Offset<reflection::Enum>> *enums,
-    const ObjT *object, size_t max_vector_size, std::stringstream *out);
+    const ObjT *object, size_t max_vector_size, std::stringstream *out,
+    bool multi_line = false, int tree_depth = 0);
 
 // Get enum value name
 const char *EnumToString(
@@ -114,13 +115,34 @@
   }
 }
 
+// Adds a newline and indents
+// Every increment in tree depth is two spaces
+void AddWrapping(std::stringstream *out, int tree_depth) {
+  *out << "\n";
+  for (int i = 0; i < tree_depth; i++) {
+    *out << "  ";
+  }
+}
+
+// Detects if a field should trigger wrapping of the parent object.
+bool ShouldCauseWrapping(reflection::BaseType type) {
+  switch (type) {
+    case BaseType::Vector:
+    case BaseType::Obj:
+      return true;
+    default:
+      return false;
+  }
+}
+
 // Print field in flatbuffer table. Field must be populated.
 template <typename ObjT>
 void FieldToString(
     const ObjT *table, const reflection::Field *field,
     const flatbuffers::Vector<flatbuffers::Offset<reflection::Object>> *objects,
     const flatbuffers::Vector<flatbuffers::Offset<reflection::Enum>> *enums,
-    size_t max_vector_size, std::stringstream *out) {
+    size_t max_vector_size, std::stringstream *out, bool multi_line,
+    int tree_depth) {
   const reflection::Type *type = field->type();
 
   switch (type->base_type()) {
@@ -187,10 +209,25 @@
           *out << "[ ... " << vector->size() << " elements ... ]";
           break;
         }
+
+        bool wrap = false;
+        const int child_tree_depth = tree_depth + 1;
+
+        if (multi_line) {
+          wrap = ShouldCauseWrapping(elem_type);
+        }
+
         *out << '[';
         for (flatbuffers::uoffset_t i = 0; i < vector->size(); ++i) {
           if (i != 0) {
-            *out << ", ";
+            if (wrap) {
+              *out << ",";
+            } else {
+              *out << ", ";
+            }
+          }
+          if (wrap) {
+            AddWrapping(out, child_tree_depth);
           }
           if (flatbuffers::IsInteger(elem_type)) {
             IntOrEnumToString(
@@ -211,18 +248,20 @@
                     flatbuffers::GetAnyVectorElemAddressOf<
                         const flatbuffers::Struct>(
                         vector, i, objects->Get(type->index())->bytesize()),
-                        max_vector_size,
-                    out);
+                    max_vector_size, out, multi_line, child_tree_depth);
               } else {
                 ObjectToString(objects->Get(type->index()), objects, enums,
                                flatbuffers::GetAnyVectorElemPointer<
                                    const flatbuffers::Table>(vector, i),
-                                   max_vector_size,
-                               out);
+                               max_vector_size, out, multi_line,
+                               child_tree_depth);
               }
             }
           }
         }
+        if (wrap) {
+          AddWrapping(out, tree_depth);
+        }
         *out << ']';
       } else {
         *out << "null";
@@ -232,10 +271,12 @@
       if (type->index() > -1 && type->index() < (int32_t)objects->size()) {
         if (objects->Get(type->index())->is_struct()) {
           ObjectToString(objects->Get(type->index()), objects, enums,
-                         flatbuffers::GetFieldStruct(*table, *field), max_vector_size, out);
+                         flatbuffers::GetFieldStruct(*table, *field),
+                         max_vector_size, out, multi_line, tree_depth);
         } else if constexpr (std::is_same<flatbuffers::Table, ObjT>()) {
           ObjectToString(objects->Get(type->index()), objects, enums,
-                         flatbuffers::GetFieldT(*table, *field),  max_vector_size,out);
+                         flatbuffers::GetFieldT(*table, *field),
+                         max_vector_size, out, multi_line, tree_depth);
         }
       } else {
         *out << "null";
@@ -253,32 +294,71 @@
     const reflection::Object *obj,
     const flatbuffers::Vector<flatbuffers::Offset<reflection::Object>> *objects,
     const flatbuffers::Vector<flatbuffers::Offset<reflection::Enum>> *enums,
-    const ObjT *object, size_t max_vector_size, std::stringstream *out) {
+    const ObjT *object, size_t max_vector_size, std::stringstream *out,
+    bool multi_line, int tree_depth) {
   static_assert(std::is_same<flatbuffers::Table, ObjT>() ||
                     std::is_same<flatbuffers::Struct, ObjT>(),
                 "Type must be either flatbuffer table or struct");
   bool print_sep = false;
+
+  const int child_tree_depth = tree_depth + 1;
+
+  bool wrap = false;
+  if (multi_line) {
+    // Check whether this object has objects, vectors, or floats inside of it
+    for (const reflection::Field *field : *obj->fields()) {
+      if (ShouldCauseWrapping(field->type()->base_type())) {
+        wrap = true;
+        break;
+      }
+    }
+  }
+
   *out << '{';
   for (const reflection::Field *field : *obj->fields()) {
     // Check whether this object has the field populated (even for structs,
     // which should have all fields populated)
     if (object->GetAddressOf(field->offset())) {
       if (print_sep) {
-        *out << ", ";
+        if (wrap) {
+          *out << ",";
+        } else {
+          *out << ", ";
+        }
       } else {
         print_sep = true;
       }
+
+      if (wrap) {
+        AddWrapping(out, child_tree_depth);
+      }
+
       *out << '"' << field->name()->c_str() << "\": ";
-      FieldToString(object, field, objects, enums, max_vector_size, out);
+      FieldToString(object, field, objects, enums, max_vector_size, out,
+                    multi_line, child_tree_depth);
     }
   }
+
+  if (wrap) {
+    AddWrapping(out, tree_depth);
+  }
+
   *out << '}';
 }
 
 }  // namespace
 
 std::string FlatbufferToJson(const reflection::Schema *schema,
-                             const uint8_t *data, size_t max_vector_size) {
+                             const uint8_t *data, bool multi_line,
+                             size_t max_vector_size) {
+  CHECK(schema != nullptr) << ": Need to provide a schema";
+
+  // It is pretty common to get passed in a nullptr when a test fails.  Rather
+  // than CHECK, return a more user friendly result.
+  if (data == nullptr) {
+    return "null";
+  }
+
   const flatbuffers::Table *table = flatbuffers::GetAnyRoot(data);
 
   const reflection::Object *obj = schema->root_table();
@@ -286,7 +366,7 @@
   std::stringstream out;
 
   ObjectToString(obj, schema->objects(), schema->enums(), table,
-                 max_vector_size, &out);
+                 max_vector_size, &out, multi_line);
 
   return out.str();
 }
diff --git a/aos/flatbuffer_introspection_test.cc b/aos/flatbuffer_introspection_test.cc
index 9da705f..9dacd41 100644
--- a/aos/flatbuffer_introspection_test.cc
+++ b/aos/flatbuffer_introspection_test.cc
@@ -62,6 +62,20 @@
             "{\"foo_double\": 0.555555555555556, \"foo_float\": 0.333333}");
 }
 
+TEST_F(FlatbufferIntrospectionTest, NanFloatTest) {
+  flatbuffers::FlatBufferBuilder builder;
+  ConfigurationBuilder config_builder(builder);
+
+  config_builder.add_foo_float(std::numeric_limits<float>::quiet_NaN());
+  config_builder.add_foo_double(std::numeric_limits<double>::quiet_NaN());
+
+  builder.Finish(config_builder.Finish());
+
+  std::string out = FlatbufferToJson(schema_, builder.GetBufferPointer());
+
+  EXPECT_EQ(out, "{\"foo_double\": null, \"foo_float\": null}");
+}
+
 TEST_F(FlatbufferIntrospectionTest, VectorScalarTest) {
   flatbuffers::FlatBufferBuilder builder;
 
@@ -154,6 +168,21 @@
   EXPECT_EQ(out, "{\"foo_enum\": \"UShort\"}");
 }
 
+TEST_F(FlatbufferIntrospectionTest, EnumWithUnknownValueTest) {
+  flatbuffers::FlatBufferBuilder builder;
+
+  ConfigurationBuilder config_builder(builder);
+  // 123 is not part of the enum.  We expect it to be represented by null in
+  // the json.
+  config_builder.fbb_.AddElement<uint8_t>(Configuration::VT_FOO_ENUM, 123, 0);
+
+  builder.Finish(config_builder.Finish());
+
+  std::string out = FlatbufferToJson(schema_, builder.GetBufferPointer());
+
+  EXPECT_EQ(out, "{\"foo_enum\": 123}");
+}
+
 TEST_F(FlatbufferIntrospectionTest, VectorStringTest) {
   flatbuffers::FlatBufferBuilder builder;
 
@@ -328,9 +357,127 @@
 
   builder.Finish(config_builder.Finish());
 
-  std::string out = FlatbufferToJson(schema_, builder.GetBufferPointer(), 100);
+  std::string out =
+      FlatbufferToJson(schema_, builder.GetBufferPointer(), false, 100);
   EXPECT_EQ(out, "{\"vector_foo_int\": [ ... 101 elements ... ]}");
 }
 
+TEST_F(FlatbufferIntrospectionTest, MultilineTest) {
+  flatbuffers::FlatBufferBuilder builder;
+  ConfigurationBuilder config_builder(builder);
+
+  config_builder.add_foo_bool(true);
+  config_builder.add_foo_int(-20);
+
+  builder.Finish(config_builder.Finish());
+
+  std::string out = FlatbufferToJson(schema_, builder.GetBufferPointer(), true);
+
+  EXPECT_EQ(out,
+            "{\n"
+            "  \"foo_bool\": true,\n"
+            "  \"foo_int\": -20\n"
+            "}");
+}
+
+TEST_F(FlatbufferIntrospectionTest, MultilineStructTest) {
+  flatbuffers::FlatBufferBuilder builder;
+  ConfigurationBuilder config_builder(builder);
+
+  FooStructNested foo_struct2(10);
+  FooStruct foo_struct(5, foo_struct2);
+
+  config_builder.add_foo_struct(&foo_struct);
+
+  builder.Finish(config_builder.Finish());
+
+  std::string out = FlatbufferToJson(schema_, builder.GetBufferPointer(), true);
+
+  EXPECT_EQ(out,
+            "{\n"
+            "  \"foo_struct\": {\n"
+            "    \"foo_byte\": 5,\n"
+            "    \"nested_struct\": {\"foo_byte\": 10}\n"
+            "  }\n"
+            "}");
+}
+
+TEST_F(FlatbufferIntrospectionTest, MultilineVectorStructTest) {
+  flatbuffers::FlatBufferBuilder builder;
+
+  FooStructNested foo_struct2(1);
+
+  auto structs = builder.CreateVectorOfStructs(
+      std::vector<FooStruct>({{5, foo_struct2}, {10, foo_struct2}}));
+
+  ConfigurationBuilder config_builder(builder);
+  config_builder.add_vector_foo_struct(structs);
+
+  builder.Finish(config_builder.Finish());
+
+  std::string out = FlatbufferToJson(schema_, builder.GetBufferPointer(), true);
+
+  EXPECT_EQ(out,
+            "{\n"
+            "  \"vector_foo_struct\": [\n"
+            "    {\n"
+            "      \"foo_byte\": 5,\n"
+            "      \"nested_struct\": {\"foo_byte\": 1}\n"
+            "    },\n"
+            "    {\n"
+            "      \"foo_byte\": 10,\n"
+            "      \"nested_struct\": {\"foo_byte\": 1}\n"
+            "    }\n"
+            "  ]\n"
+            "}");
+}
+
+TEST_F(FlatbufferIntrospectionTest, MultilineVectorScalarTest) {
+  flatbuffers::FlatBufferBuilder builder;
+
+  // Flatbuffers don't like creating vectors simultaneously with table, so do
+  // first.
+  auto foo_ints =
+      builder.CreateVector<int32_t>({-300, -200, -100, 0, 100, 200, 300});
+
+  auto foo_floats =
+      builder.CreateVector<float>({0.0, 1.0 / 9.0, 2.0 / 9.0, 3.0 / 9.0});
+  auto foo_doubles =
+      builder.CreateVector<double>({0, 1.0 / 9.0, 2.0 / 9.0, 3.0 / 9.0});
+
+  ConfigurationBuilder config_builder(builder);
+
+  config_builder.add_vector_foo_int(foo_ints);
+  config_builder.add_vector_foo_float(foo_floats);
+  config_builder.add_vector_foo_double(foo_doubles);
+
+  builder.Finish(config_builder.Finish());
+
+  std::string out = FlatbufferToJson(schema_, builder.GetBufferPointer(), true);
+
+  EXPECT_EQ(out,
+            "{\n"
+            "  \"vector_foo_double\": [0, 0.111111111111111, "
+            "0.222222222222222, 0.333333333333333],\n"
+            "  \"vector_foo_float\": [0, 0.111111, 0.222222, 0.333333],\n"
+            "  \"vector_foo_int\": [-300, -200, -100, 0, 100, 200, 300]\n"
+            "}");
+}
+
+// Tests that a nullptr buffer prints nullptr.
+TEST_F(FlatbufferIntrospectionTest, NullptrData) {
+  EXPECT_EQ("null", FlatbufferToJson(schema_, nullptr));
+}
+
+// Tests that a null schema gets caught.
+TEST(FlatbufferIntrospectionDeathTest, NullSchema) {
+  EXPECT_DEATH(
+      {
+        FlatbufferToJson(static_cast<const reflection::Schema *>(nullptr),
+                         nullptr);
+      },
+      "Need to provide a schema");
+}
+
 }  // namespace testing
 }  // namespace aos
diff --git a/aos/flatbuffers.h b/aos/flatbuffers.h
index 6e86d35..6fca458 100644
--- a/aos/flatbuffers.h
+++ b/aos/flatbuffers.h
@@ -125,7 +125,8 @@
   FlatbufferString(const std::string_view data) : data_(data) {}
   // Builds a Flatbuffer by copying the data from the other flatbuffer.
   FlatbufferString(const Flatbuffer<T> &other) {
-    data_ = std::string(other.data(), other.size());
+    data_ =
+        std::string(reinterpret_cast<const char *>(other.data()), other.size());
   }
 
   // Coppies the data from the other flatbuffer.
diff --git a/aos/ftrace.cc b/aos/ftrace.cc
new file mode 100644
index 0000000..af4964c
--- /dev/null
+++ b/aos/ftrace.cc
@@ -0,0 +1,45 @@
+#include "aos/ftrace.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+namespace aos {
+
+Ftrace::~Ftrace() {
+  if (message_fd_ != -1) {
+    PCHECK(close(message_fd_) == 0);
+  }
+  if (message_fd_ != -1) {
+    PCHECK(close(on_fd_) == 0);
+  }
+}
+
+void Ftrace::FormatMessage(const char *format, ...) {
+  if (message_fd_ == -1) {
+    return;
+  }
+  char buffer[512];
+  va_list ap;
+  va_start(ap, format);
+  const int result = vsnprintf(buffer, sizeof(buffer), format, ap);
+  va_end(ap);
+  CHECK_LE(static_cast<size_t>(result), sizeof(buffer))
+      << ": Format string ended up too long: " << format;
+  WriteMessage(std::string_view(buffer, result));
+}
+
+void Ftrace::WriteMessage(std::string_view content) {
+  if (message_fd_ == -1) {
+    return;
+  }
+  const int result = write(message_fd_, content.data(), content.size());
+  if (result == -1 && errno == EBADF) {
+    // This just means tracing is turned off. Ignore it.
+    return;
+  }
+  PCHECK(result >= 0) << ": Failed to write ftrace message: " << content;
+  CHECK_EQ(static_cast<size_t>(result), content.size())
+      << ": Failed to write complete ftrace message: " << content;
+}
+
+}  // namespace aos
diff --git a/aos/ftrace.h b/aos/ftrace.h
new file mode 100644
index 0000000..42147ad
--- /dev/null
+++ b/aos/ftrace.h
@@ -0,0 +1,51 @@
+#ifndef AOS_FTRACE_H_
+#define AOS_FTRACE_H_
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string_view>
+
+#include "glog/logging.h"
+
+namespace aos {
+
+// Manages interacting with ftrace. Silently hides many errors, because they are
+// expected to occur when ftrace is not enabled, and we want calling code to
+// continue working in that case.
+class Ftrace {
+ public:
+  Ftrace()
+      : message_fd_(open("/sys/kernel/debug/tracing/trace_marker", O_WRONLY)),
+        on_fd_(open("/sys/kernel/debug/tracing/tracing_on", O_WRONLY)) {}
+  ~Ftrace();
+
+  // Writes a message with a printf-style format.
+  //
+  // Silently does nothing if tracing is disabled.
+  void FormatMessage(const char *format, ...)
+      __attribute__((format(__printf__, 2, 3)));
+
+  // Writes a preformatted message.
+  //
+  // Silently does nothing if tracing is disabled.
+  void WriteMessage(std::string_view content);
+
+  // Turns tracing off, or CHECK-fails if tracing is inaccessible. Does nothing
+  // if tracing is currently available but off.
+  void TurnOffOrDie() {
+    CHECK(on_fd_ != -1)
+        << ": Failed to open tracing_on earlier, cannot turn off tracing";
+    char zero = '0';
+    CHECK_EQ(write(on_fd_, &zero, 1), 1) << ": Failed to turn tracing off";
+  }
+
+ private:
+  int message_fd_, on_fd_;
+};
+
+}  // namespace aos
+
+#endif  // AOS_FTRACE_H_
diff --git a/aos/ipc_lib/lockless_queue.cc b/aos/ipc_lib/lockless_queue.cc
index f31d80d..02aebcb 100644
--- a/aos/ipc_lib/lockless_queue.cc
+++ b/aos/ipc_lib/lockless_queue.cc
@@ -276,6 +276,50 @@
   // Everything should be zero initialized already.  So we just need to fill
   // everything out properly.
 
+  // This is the UID we will use for checking signal-sending permission
+  // compatibility.
+  //
+  // The manpage says:
+  //   For a process to have permission to send a signal, it must either be
+  //   privileged [...], or the real or effective user ID of the sending process
+  //   must equal the real or saved set-user-ID of the target process.
+  //
+  // Processes typically initialize a queue in random order as they start up.
+  // This means we need an algorithm for verifying all processes have
+  // permissions to send each other signals which gives the same answer no
+  // matter what order they attach in. We would also like to avoid maintaining a
+  // shared list of the UIDs of all processes.
+  //
+  // To do this while still giving sufficient flexibility for all current use
+  // cases, we track a single UID for the queue. All processes with a matching
+  // euid+suid must have this UID. Any processes with distinct euid/suid must
+  // instead have a matching ruid.  This guarantees signals can be sent between
+  // all processes attached to the queue.
+  //
+  // In particular, this allows a process to change only its euid (to interact
+  // with a queue) while still maintaining privileges via its ruid. However, it
+  // can only use privileges in ways that do not require changing the euid back,
+  // because while the euid is different it will not be able to receive signals.
+  // We can't actually verify that, but we can sanity check that things are
+  // valid when the queue is initialized.
+
+  uid_t uid;
+  {
+    uid_t ruid, euid, suid;
+    PCHECK(getresuid(&ruid, &euid, &suid) == 0);
+    // If these are equal, then use them, even if that's different from the real
+    // UID. This allows processes to keep a real UID of 0 (to have permissions
+    // to perform system-level changes) while still being able to communicate
+    // with processes running unprivileged as a distinct user.
+    if (euid == suid) {
+      uid = euid;
+      VLOG(1) << "Using euid==suid " << uid;
+    } else {
+      uid = ruid;
+      VLOG(1) << "Using ruid " << ruid;
+    }
+  }
+
   // Grab the mutex.  We don't care if the previous reader died.  We are going
   // to check everything anyways.
   GrabQueueSetupLockOrDie grab_queue_setup_lock(memory);
@@ -306,7 +350,7 @@
     }
 
     memory->next_queue_index.Invalidate();
-    memory->uid = getuid();
+    memory->uid = uid;
 
     for (size_t i = 0; i < memory->num_senders(); ++i) {
       ::aos::ipc_lib::Sender *s = memory->GetSender(i);
@@ -321,7 +365,7 @@
     // redo initialization.
     memory->initialized = true;
   } else {
-    CHECK_EQ(getuid(), memory->uid) << ": UIDs must match for all processes";
+    CHECK_EQ(uid, memory->uid) << ": UIDs must match for all processes";
   }
 
   return memory;
@@ -518,7 +562,8 @@
   }
 
   if (sender_index_ == -1) {
-    LOG(FATAL) << "Too many senders";
+    VLOG(1) << "Too many senders, starting to bail.";
+    return;
   }
 
   ::aos::ipc_lib::Sender *s = memory_->GetSender(sender_index_);
@@ -529,13 +574,18 @@
 }
 
 LocklessQueue::Sender::~Sender() {
-  if (memory_ != nullptr) {
+  if (valid()) {
     death_notification_release(&(memory_->GetSender(sender_index_)->tid));
   }
 }
 
-LocklessQueue::Sender LocklessQueue::MakeSender() {
-  return LocklessQueue::Sender(memory_);
+std::optional<LocklessQueue::Sender> LocklessQueue::MakeSender() {
+  LocklessQueue::Sender result = LocklessQueue::Sender(memory_);
+  if (result.valid()) {
+    return std::move(result);
+  } else {
+    return std::nullopt;
+  }
 }
 
 QueueIndex ZeroOrValid(QueueIndex index) {
diff --git a/aos/ipc_lib/lockless_queue.h b/aos/ipc_lib/lockless_queue.h
index 550485f..de80f3d 100644
--- a/aos/ipc_lib/lockless_queue.h
+++ b/aos/ipc_lib/lockless_queue.h
@@ -5,6 +5,7 @@
 #include <sys/signalfd.h>
 #include <sys/types.h>
 #include <vector>
+#include <optional>
 
 #include "aos/ipc_lib/aos_sync.h"
 #include "aos/ipc_lib/data_alignment.h"
@@ -245,6 +246,11 @@
 
     Sender(LocklessQueueMemory *memory);
 
+    // Returns true if this sender is valid.  If it isn't valid, any of the
+    // other methods won't work.  This is here to allow the lockless queue to
+    // only build a sender if there was one available.
+    bool valid() const { return sender_index_ != -1 && memory_ != nullptr; }
+
     // Pointer to the backing memory.
     LocklessQueueMemory *memory_ = nullptr;
 
@@ -252,8 +258,9 @@
     int sender_index_ = -1;
   };
 
-  // Creates a sender.
-  Sender MakeSender();
+  // Creates a sender.  If we couldn't allocate a sender, returns nullopt.
+  // TODO(austin): Change the API if we find ourselves with more errors.
+  std::optional<Sender> MakeSender();
 
  private:
   LocklessQueueMemory *memory_ = nullptr;
diff --git a/aos/ipc_lib/lockless_queue_death_test.cc b/aos/ipc_lib/lockless_queue_death_test.cc
index 213c9e4..b7cdfee 100644
--- a/aos/ipc_lib/lockless_queue_death_test.cc
+++ b/aos/ipc_lib/lockless_queue_death_test.cc
@@ -521,7 +521,7 @@
         LocklessQueue queue(
             reinterpret_cast<aos::ipc_lib::LocklessQueueMemory *>(memory),
             config);
-        LocklessQueue::Sender sender = queue.MakeSender();
+        LocklessQueue::Sender sender = queue.MakeSender().value();
         for (int i = 0; i < 2; ++i) {
           char data[100];
           size_t s = snprintf(data, sizeof(data), "foobar%d", i + 1);
@@ -555,7 +555,7 @@
 
         LocklessQueue queue(memory, config);
         // Building and destroying a sender will clean up the queue.
-        { LocklessQueue::Sender sender = queue.MakeSender(); }
+        { LocklessQueue::Sender sender = queue.MakeSender().value(); }
 
         if (print) {
           printf("Cleaned up version:\n");
@@ -563,12 +563,12 @@
         }
 
         {
-          LocklessQueue::Sender sender = queue.MakeSender();
+          LocklessQueue::Sender sender = queue.MakeSender().value();
           {
             // Make a second sender to confirm that the slot was freed.
             // If the sender doesn't get cleaned up, this will fail.
             LocklessQueue queue2(memory, config);
-            queue2.MakeSender();
+            queue2.MakeSender().value();
           }
 
           // Send a message to make sure that the queue still works.
diff --git a/aos/ipc_lib/lockless_queue_test.cc b/aos/ipc_lib/lockless_queue_test.cc
index 109c2ea..65b2a15 100644
--- a/aos/ipc_lib/lockless_queue_test.cc
+++ b/aos/ipc_lib/lockless_queue_test.cc
@@ -194,17 +194,15 @@
 }
 
 // Tests that too many watchers dies like expected.
-TEST_F(LocklessQueueDeathTest, TooManySenders) {
-  EXPECT_DEATH(
-      {
-        ::std::vector<::std::unique_ptr<LocklessQueue>> queues;
-        ::std::vector<LocklessQueue::Sender> senders;
-        for (size_t i = 0; i < config_.num_senders + 1; ++i) {
-          queues.emplace_back(new LocklessQueue(get_memory(), config_));
-          senders.emplace_back(queues.back()->MakeSender());
-        }
-      },
-      "Too many senders");
+TEST_F(LocklessQueueTest, TooManySenders) {
+  ::std::vector<::std::unique_ptr<LocklessQueue>> queues;
+  ::std::vector<LocklessQueue::Sender> senders;
+  for (size_t i = 0; i < config_.num_senders; ++i) {
+    queues.emplace_back(new LocklessQueue(get_memory(), config_));
+    senders.emplace_back(queues.back()->MakeSender().value());
+  }
+  queues.emplace_back(new LocklessQueue(get_memory(), config_));
+  EXPECT_FALSE(queues.back()->MakeSender());
 }
 
 // Now, start 2 threads and have them receive the signals.
@@ -240,7 +238,7 @@
 TEST_F(LocklessQueueTest, Send) {
   LocklessQueue queue(get_memory(), config_);
 
-  LocklessQueue::Sender sender = queue.MakeSender();
+  LocklessQueue::Sender sender = queue.MakeSender().value();
 
   // Send enough messages to wrap.
   for (int i = 0; i < 20000; ++i) {
diff --git a/aos/ipc_lib/queue_racer.cc b/aos/ipc_lib/queue_racer.cc
index 9bb0a70..f5b3d7e 100644
--- a/aos/ipc_lib/queue_racer.cc
+++ b/aos/ipc_lib/queue_racer.cc
@@ -143,7 +143,7 @@
         ::std::thread([this, &t, thread_index, &run, write_wrap_count]() {
           // Build up a sender.
           LocklessQueue queue(memory_, config_);
-          LocklessQueue::Sender sender = queue.MakeSender();
+          LocklessQueue::Sender sender = queue.MakeSender().value();
 
           // Signal that we are ready to start sending.
           t.ready.Set();
diff --git a/aos/json_to_flatbuffer.h b/aos/json_to_flatbuffer.h
index cadc0de..e852ead 100644
--- a/aos/json_to_flatbuffer.h
+++ b/aos/json_to_flatbuffer.h
@@ -44,7 +44,8 @@
 
 ::std::string TableFlatbufferToJson(const flatbuffers::Table *t,
                                     const ::flatbuffers::TypeTable *typetable,
-                                    bool multi_line, size_t max_vector_size = SIZE_MAX);
+                                    bool multi_line,
+                                    size_t max_vector_size = SIZE_MAX);
 
 // Converts a DetachedBuffer holding a flatbuffer to JSON.
 inline ::std::string FlatbufferToJson(const flatbuffers::DetachedBuffer &buffer,
@@ -78,7 +79,7 @@
 
 std::string FlatbufferToJson(const reflection::Schema *const schema,
                              const uint8_t *const data,
-                             size_t max_vector_size = SIZE_MAX);
+                             bool multi_line = false, size_t max_vector_size = SIZE_MAX);
 
 }  // namespace aos
 
diff --git a/aos/json_to_flatbuffer_test.cc b/aos/json_to_flatbuffer_test.cc
index a9112f4..6dbb7ea 100644
--- a/aos/json_to_flatbuffer_test.cc
+++ b/aos/json_to_flatbuffer_test.cc
@@ -231,5 +231,11 @@
   EXPECT_EQ("{ \"vector_foo_int\": [ ... 101 elements ... ] }", back_json_long);
 }
 
+// Tests that a nullptr buffer prints nullptr.
+TEST_F(JsonToFlatbufferTest, NullptrData) {
+  EXPECT_EQ("null", TableFlatbufferToJson((const flatbuffers::Table *)(nullptr),
+                                          ConfigurationTypeTable(), false));
+}
+
 }  // namespace testing
 }  // namespace aos
diff --git a/aos/json_tokenizer.cc b/aos/json_tokenizer.cc
index c54e0ed..a3d804e 100644
--- a/aos/json_tokenizer.cc
+++ b/aos/json_tokenizer.cc
@@ -146,6 +146,12 @@
     return true;
   }
 
+  // People tend to use null instead of nan.  Accept that too.
+  if (Consume("null")) {
+    *s = ::std::string("nan");
+    return true;
+  }
+
   // Then, we either get a 0, or we get a nonzero.  Only nonzero can be followed
   // by a second number.
   if (!Consume("0")) {
diff --git a/aos/realtime.cc b/aos/realtime.cc
index 6668855..558417f 100644
--- a/aos/realtime.cc
+++ b/aos/realtime.cc
@@ -50,8 +50,7 @@
   if (set_for_root == SetLimitForRoot::kYes || !am_root) {
     struct rlimit64 rlim;
     PCHECK(getrlimit64(resource, &rlim) == 0)
-        << ": " << program_invocation_short_name << "-init: getrlimit64("
-        << resource << ") failed";
+        << ": getting limit for " << resource;
 
     if (allow_decrease == AllowSoftLimitDecrease::kYes) {
       rlim.rlim_cur = soft;
@@ -61,9 +60,8 @@
     rlim.rlim_max = ::std::max(rlim.rlim_max, soft);
 
     PCHECK(setrlimit64(resource, &rlim) == 0)
-        << ": " << program_invocation_short_name << "-init: setrlimit64("
-        << resource << ", {cur=" << (uintmax_t)rlim.rlim_cur
-        << ",max=" << (uintmax_t)rlim.rlim_max << "}) failed";
+        << ": changing limit for " << resource << " to " << rlim.rlim_cur
+        << " with max of " << rlim.rlim_max;
   }
 }
 
@@ -74,8 +72,7 @@
   SetSoftRLimit(RLIMIT_MEMLOCK, RLIM_INFINITY, SetLimitForRoot::kNo);
 
   WriteCoreDumps();
-  PCHECK(mlockall(MCL_CURRENT | MCL_FUTURE) == 0)
-      << ": " << program_invocation_short_name << "-init: mlockall failed";
+  PCHECK(mlockall(MCL_CURRENT | MCL_FUTURE) == 0);
 
   // Don't give freed memory back to the OS.
   CHECK_EQ(1, mallopt(M_TRIM_THRESHOLD, -1));
@@ -114,15 +111,19 @@
 void UnsetCurrentThreadRealtimePriority() {
   struct sched_param param;
   param.sched_priority = 0;
-  PCHECK(sched_setscheduler(0, SCHED_OTHER, &param) == 0)
-      << ": sched_setscheduler(0, SCHED_OTHER, 0) failed";
+  PCHECK(sched_setscheduler(0, SCHED_OTHER, &param) == 0);
+}
+
+void SetCurrentThreadAffinity(const cpu_set_t &cpuset) {
+  PCHECK(sched_setaffinity(0, sizeof(cpuset), &cpuset) == 0);
 }
 
 void SetCurrentThreadName(const std::string_view name) {
   CHECK_LE(name.size(), 16u) << ": thread name '" << name << "' too long";
   VLOG(1) << "This thread is changing to '" << name << "'";
   std::string string_name(name);
-  PCHECK(prctl(PR_SET_NAME, string_name.c_str()) == 0);
+  PCHECK(prctl(PR_SET_NAME, string_name.c_str()) == 0)
+      << ": changing name to " << string_name;
   if (&logging::internal::ReloadThreadName != nullptr) {
     logging::internal::ReloadThreadName();
   }
@@ -135,7 +136,7 @@
   struct sched_param param;
   param.sched_priority = priority;
   PCHECK(sched_setscheduler(0, SCHED_FIFO, &param) == 0)
-      << ": sched_setscheduler(0, SCHED_FIFO, " << priority << ") failed";
+      << ": changing to SCHED_FIFO with " << priority;
 }
 
 void WriteCoreDumps() {
diff --git a/aos/realtime.h b/aos/realtime.h
index 20df979..6e0a472 100644
--- a/aos/realtime.h
+++ b/aos/realtime.h
@@ -1,6 +1,7 @@
 #ifndef AOS_REALTIME_H_
 #define AOS_REALTIME_H_
 
+#include <sched.h>
 #include <string_view>
 
 namespace aos {
@@ -21,6 +22,9 @@
 // Sets the current thread's realtime priority.
 void SetCurrentThreadRealtimePriority(int priority);
 
+// Sets the current thread's scheduling affinity.
+void SetCurrentThreadAffinity(const cpu_set_t &cpuset);
+
 // Sets up this process to write core dump files.
 // This is called by Init*, but it's here for other files that want this
 // behavior without calling Init*.
diff --git a/aos/util/file.cc b/aos/util/file.cc
index b334ded..089efbc 100644
--- a/aos/util/file.cc
+++ b/aos/util/file.cc
@@ -66,5 +66,10 @@
   PCHECK(result == 0) << ": Error creating " << folder;
 }
 
+bool PathExists(std::string_view path) {
+  struct stat buffer;
+  return stat(path.data(), &buffer) == 0;
+}
+
 }  // namespace util
 }  // namespace aos
diff --git a/aos/util/file.h b/aos/util/file.h
index d6724af..9ee2fb4 100644
--- a/aos/util/file.h
+++ b/aos/util/file.h
@@ -17,6 +17,8 @@
 
 void MkdirP(std::string_view path, mode_t mode);
 
+bool PathExists(std::string_view path);
+
 }  // namespace util
 }  // namespace aos
 
diff --git a/aos/util/file_test.cc b/aos/util/file_test.cc
index fa259e8..6851302 100644
--- a/aos/util/file_test.cc
+++ b/aos/util/file_test.cc
@@ -26,6 +26,19 @@
   EXPECT_EQ(my_pid, stat.substr(0, my_pid.size()));
 }
 
+// Tests that the PathExists function works under normal conditions.
+TEST(FileTest, PathExistsTest) {
+  const std::string tmpdir(getenv("TEST_TMPDIR"));
+  const std::string test_file = tmpdir + "/test_file";
+  // Make sure the test_file doesn't exist.
+  unlink(test_file.c_str());
+  EXPECT_FALSE(PathExists(test_file));
+
+  WriteStringToFileOrDie(test_file, "abc");
+
+  EXPECT_TRUE(PathExists(test_file));
+}
+
 }  // namespace testing
 }  // namespace util
 }  // namespace aos
diff --git a/build_tests/BUILD b/build_tests/BUILD
index 28dc677..e837390 100644
--- a/build_tests/BUILD
+++ b/build_tests/BUILD
@@ -124,3 +124,10 @@
     srcs_version = "PY2AND3",
     deps = ["@opencv_contrib_nonfree_amd64//:python_opencv"],
 )
+
+py_test(
+    name = "python_jinja2",
+    srcs = ["python_jinja2.py"],
+    srcs_version = "PY2AND3",
+    deps = ["@python_jinja2"],
+)
diff --git a/build_tests/python_jinja2.py b/build_tests/python_jinja2.py
new file mode 100644
index 0000000..a926e31
--- /dev/null
+++ b/build_tests/python_jinja2.py
@@ -0,0 +1,4 @@
+#!/usr/bin/python3
+
+# Confirm that we can import jinja2.
+import jinja2
diff --git a/debian/python_jinja2.BUILD b/debian/python_jinja2.BUILD
new file mode 100644
index 0000000..1adab6a
--- /dev/null
+++ b/debian/python_jinja2.BUILD
@@ -0,0 +1,7 @@
+py_library(
+    name = "python_jinja2",
+    srcs = glob(["src/jinja2/*.py"]),
+    imports = ["src/"],
+    visibility = ["//visibility:public"],
+    deps = ["@python_markupsafe"],
+)
diff --git a/debian/python_markupsafe.BUILD b/debian/python_markupsafe.BUILD
new file mode 100644
index 0000000..87f10ea
--- /dev/null
+++ b/debian/python_markupsafe.BUILD
@@ -0,0 +1,6 @@
+py_library(
+    name = "python_markupsafe",
+    srcs = glob(["src/markupsafe/*.py"]),
+    imports = ["src/"],
+    visibility = ["//visibility:public"],
+)
diff --git a/documentation/tutorials/BUILD b/documentation/tutorials/BUILD
index 13516c1..5816444 100644
--- a/documentation/tutorials/BUILD
+++ b/documentation/tutorials/BUILD
@@ -6,10 +6,8 @@
     "create-a-simple-program-for-running-a-motor",
     "download-code-to-the-robot",
     "make-a-drivebase-move",
-    "send-and-receive-messages-on-queues",
     "submitting-code-for-a-review",
     "tune-an-autonomous",
-    "generated-queue-code",
     "using-bazel",
 ]
 
diff --git a/documentation/tutorials/generated-queue-code.md b/documentation/tutorials/generated-queue-code.md
deleted file mode 100644
index 8f00a31..0000000
--- a/documentation/tutorials/generated-queue-code.md
+++ /dev/null
@@ -1,246 +0,0 @@
-# Generated queue code
-
-This tutorial covers what the code that is generated from a .q file looks like.
-
-### The basics
-Each `.q` file generates a `.cc` and `.h` pair that can be #include'd in your c++
-code.
-
-We're going to use `//frc971/queues:gyro.q` as our example.
-
-## Package declaration
-`.q` files usually start with a `package` declaration. The `package` declaration
-directly converts to a set of namespace declarations. So
-
-```cpp
-package frc971.sensors;
-```
-
-generates the namespace definitions
-
-```cpp
-namespace frc971 {
-namespace sensors {
-
-// All of the code for the queue
-
-}  // namespace sensors
-}  // namespace frc971
-```
-
-## Message declarations
-Each message declared in the queue generates a class that can be instantiated in
-your code easily.
-
-For a simple example, lets use the `Uid` message in `//frc971/queues:gyro.q`:
-
-```cpp
-message Uid {
-  uint32_t uid;
-};
-```
-
-Let's take a look at the actual class definition that is created. If you ever
-want to inspect the code that is generated from a .q file, you can take a look
-in `bazel-genfiles` after you build your code. So after running `bazel build
-//frc971/queues:gyro` there will be a `bazel-genfiles/frc971/queues/gyro.q.h`
-and `bazel-genfiles/frc971/queues/gyro.q.cc`.
-
-Here's the definition of the `Uid` class from
-`bazel-genfiles/frc971/queues/gyro.q.h` (comments mine):
-
-```cpp
-struct Uid : public ::aos::Message {
-  // Used to identify queues uniquely even if they have the same name.
-  enum { kQueueLength = 100, kHash = 0x0837c541 };
-
-  // The actual data that we requested be part of the message.
-  uint32_t uid;
-
-  // Writes the message to a byte buffer.
-  size_t Serialize(char *buffer) const;
-  // Reads the message from a byte buffer.
-  size_t Deserialize(const char *buffer);
-  // Zeroes all of the fields in the message.
-  void Zero();
-  // Returns the size of message. Not used
-  static size_t Size() { return 4 + ::aos::Message::Size(); }
-  // Prints the contents of the message to a string that is human readable.
-  size_t Print(char *buffer, size_t length) const;
-
-  // Gets information about this message's type.
-  static const ::aos::MessageType *GetType();
-  static const ::aos::MessageType *DoGetType();
-
-  // Default constructor, zeroes the struct.
-  Uid();
-  // Value constructor, sets the data at construction.
-  Uid(uint32_t uid_in);
-  bool EqualsNoTime(const Uid &other) const;
-};
-```
-
-## Queue declarations
-Every `queue` declaration in the `.q` file creates an `aos::Queue` variable that
-uses the message type given. So the declaration
-
-```cpp
-queue Uid gyro_part_id;
-```
-
-will generate a variable in `gyro.q.h`
-
-```cpp
-static UNUSED_VARIABLE::aos::Queue<Uid> &gyro_part_id;
-```
-
-which can then be used in any file that includes the header file.
-
-
-## QueueGroup declarations
-
-`queue_group` declares a set of queues and some associated helpers for them.
-There are two kinds of `queue_groups`, declarations and aliases. Here's an
-example of a `queue_group` definition:
-
-```cpp
-queue_group SuperstructureQueue {
-  implements aos.control_loops.ControlLoop;
-
-  message Goal {
-    IntakeGoal intake;
-
-    // Used to identify a position in the planned set of positions on the arm.
-    uint32_t arm_goal_position;
-    // If true, start the grab box sequence.
-    bool grab_box;
-
-    bool open_claw;
-    bool close_claw;
-
-    bool deploy_fork;
-
-    bool hook_release;
-
-    double voltage_winch;
-
-    double open_threshold;
-
-    bool disable_box_correct;
-
-    bool trajectory_override;
-  };
-
-  message Status {
-    // Are all the subsystems zeroed?
-    bool zeroed;
-
-    // If true, any of the subsystems have aborted.
-    bool estopped;
-
-    // Status of both intake sides.
-    IntakeSideStatus left_intake;
-    IntakeSideStatus right_intake;
-
-    ArmStatus arm;
-
-    double filtered_box_velocity;
-    uint32_t rotation_state;
-  };
-
-  message Position {
-    // Values of the series elastic encoders on the left side of the robot from
-    // the rear perspective in radians.
-    IntakeElasticSensors left_intake;
-
-    // Values of the series elastic encoders on the right side of the robot from
-    // the rear perspective in radians.
-    IntakeElasticSensors right_intake;
-
-    ArmPosition arm;
-
-    // Value of the beam breaker sensor. This value is true if the beam is
-    // broken, false if the beam isn't broken.
-    bool claw_beambreak_triggered;
-    // Value of the beambreak sensor detecting when the box has hit the frame
-    // cutout.
-    bool box_back_beambreak_triggered;
-
-    // Distance to the box in meters.
-    double box_distance;
-  };
-
-  message Output {
-    // Voltage sent to the parts on the left side of the intake.
-    IntakeVoltage left_intake;
-
-    // Voltage sent to the parts on the right side of the intake.
-    IntakeVoltage right_intake;
-
-    // Voltage sent to the motors on the proximal joint of the arm.
-    double voltage_proximal;
-
-    // Voltage sent to the motors on the distal joint of the arm.
-    double voltage_distal;
-
-    // Voltage sent to the hanger.  Positive pulls the robot up.
-    double voltage_winch;
-
-    // Clamped (when true) or unclamped (when false) status sent to the
-    // pneumatic claw on the arm.
-    bool claw_grabbed;
-
-    // If true, release the arm brakes.
-    bool release_arm_brake;
-    // If true, release the hook
-    bool hook_release;
-    // If true, release the forks
-    bool forks_release;
-  };
-
-  queue Goal goal;
-  queue Output output;
-  queue Status status;
-  queue Position position;
-};
-```
-
-and aliases look like:
-
-```cpp
-queue_group SuperstructureQueue superstructure_queue;
-```
-
-The declaration type creates the definition of the queue group and what messages
-and queues it contains. The alias type creates a variable with the given name.
-
-This queue group results in the following code in
-`bazel-genfiles/y2018/control_loops/superstructure/superstructure.q.h`
-
-```cpp
-
-struct SuperstructureQueue_Goal : public ::aos::Message { /* ... */ }
-struct SuperstructureQueue_Output : public ::aos::Message { /* ... */ }
-struct SuperstructureQueue_Status : public ::aos::Message { /* ... */ }
-struct SuperstructureQueue_Position : public ::aos::Message { /* ... */ }
-class SuperstructureQueue : public ::aos::QueueGroup {
- public:
-  typedef SuperstructureQueue_Goal Goal;
-  ::aos::Queue<SuperstructureQueue_Goal> goal;
-  typedef SuperstructureQueue_Output Output;
-  ::aos::Queue<SuperstructureQueue_Output> output;
-  typedef SuperstructureQueue_Status Status;
-  ::aos::Queue<SuperstructureQueue_Status> status;
-  typedef SuperstructureQueue_Position Position;
-  ::aos::Queue<SuperstructureQueue_Position> position;
-  SuperstructureQueue(const char *name, uint32_t hash, const char *goal_name,
-                      const char *output_name, const char *status_name,
-                      const char *position_name);
-};
-```
-
-and a definition of a variable:
-
-```cpp
-static UNUSED_VARIABLE SuperstructureQueue &superstructure_queue;
-```
diff --git a/documentation/tutorials/send-and-receive-messages-on-queues.md b/documentation/tutorials/send-and-receive-messages-on-queues.md
deleted file mode 100644
index bcdaac1..0000000
--- a/documentation/tutorials/send-and-receive-messages-on-queues.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# How to send and receive messages on a Queue
-
-Let's say we have a `//y2018:basic_queue.q` file that has the following
-contents:
-
-```cpp
-package y2018;
-
-message BasicMessage {
-  float value;
-};
-
-queue BasicMessage basic_queue;
-```
-
-and the corresponding entry in `//y2018/BUILD`
-
-```python
-queue_library(
-    name = "basic_queue",
-    srcs = [
-        "basic_queue.q",
-    ],
-    visibility = ["//visibility:public"],
-)
-```
-
-The following examples assume that you've declared a dependency on
-`//y2018:basic_queue` in the relevant `BUILD` file.
-
-## Sending a message
-First, include the queue definition that is created from the `.q` file. This
-will make the `basic_queue` variable available.
-
-```cpp
-#include "y2018/basic_queue.q.h"
-```
-
-```cpp
-
-// Allocate a new message pointer to use. basic_queue is from the include
-// statement.
-auto new_basic_message = basic_queue.MakeMessage();
-new_goal.value = 0.5;
-
-// Send the message on the queue
-if (!new_basic_message.Send()) {
-  LOG(ERROR, "Failed to send.\n");
-}
-```
-
-## Receiving a message
-
-Once again, we must include the queue header file to get access to the queue.
-
-
-```cpp
-#include "y2018/basic_queue.q.h"
-```
-
-Then we can receive a message.
-
-
-```cpp
-basic_queue.FetchLatest();
-if (basic_queue.get()) {
-  // We have a valid message ready to be used
-  LOG(INFO, "Got a BasicMessage from basic_queue with value %f\n",
-      basic_queue->value);
-} else {
-  // No new message was available
-  LOG(ERROR, "No message was received\n");
-}
-```
diff --git a/frc971/control_loops/drivetrain/hybrid_ekf.h b/frc971/control_loops/drivetrain/hybrid_ekf.h
index e03d581..bd358ca 100644
--- a/frc971/control_loops/drivetrain/hybrid_ekf.h
+++ b/frc971/control_loops/drivetrain/hybrid_ekf.h
@@ -138,6 +138,18 @@
   static constexpr int kNInputs = 4;
   // Number of previous samples to save.
   static constexpr int kSaveSamples = 50;
+  // Whether we should completely rerun the entire stored history of
+  // kSaveSamples on every correction. Enabling this will increase overall CPU
+  // usage substantially; however, leaving it disabled makes it so that we are
+  // less likely to notice if processing camera frames is causing delays in the
+  // drivetrain.
+  // If we are having CPU issues, we have three easy avenues to improve things:
+  // (1) Reduce kSaveSamples (e.g., if all camera frames arive within
+  //     100 ms, then we can reduce kSaveSamples to be 25 (125 ms of samples)).
+  // (2) Don't actually rely on the ability to insert corrections into the
+  //     timeline.
+  // (3) Set this to false.
+  static constexpr bool kFullRewindOnEverySample = false;
   // Assume that all correction steps will have kNOutputs
   // dimensions.
   // TODO(james): Relax this assumption; relaxing it requires
@@ -182,19 +194,24 @@
     X_hat_ = state;
     have_zeroed_encoders_ = true;
     P_ = P;
-    observations_.PushFromBottom(
-        {t,
-         t,
-         X_hat_,
-         P_,
-         Input::Zero(),
-         Output::Zero(),
-         {},
-         [](const State &, const Input &) { return Output::Zero(); },
-         [](const State &) {
-           return Eigen::Matrix<Scalar, kNOutputs, kNStates>::Zero();
-         },
-         Eigen::Matrix<Scalar, kNOutputs, kNOutputs>::Identity()});
+    observations_.PushFromBottom({
+        t,
+        t,
+        X_hat_,
+        P_,
+        Input::Zero(),
+        Output::Zero(),
+        {},
+        [](const State &, const Input &) { return Output::Zero(); },
+        [](const State &) {
+          return Eigen::Matrix<Scalar, kNOutputs, kNStates>::Zero();
+        },
+        Eigen::Matrix<Scalar, kNOutputs, kNOutputs>::Identity(),
+        StateSquare::Identity(),
+        StateSquare::Zero(),
+        std::chrono::seconds(0),
+        State::Zero(),
+    });
   }
 
   // Correct with:
@@ -292,6 +309,29 @@
            dt_config_.robot_radius;
   }
 
+  // Returns the last state before the specified time.
+  // Returns nullopt if time is older than the oldest measurement.
+  std::optional<State> LastStateBeforeTime(
+      aos::monotonic_clock::time_point time) {
+    if (observations_.empty() || observations_.begin()->t > time) {
+      return std::nullopt;
+    }
+    for (const auto &observation : observations_) {
+      if (observation.t > time) {
+        // Note that observation.X_hat actually references the _previous_ X_hat.
+        return observation.X_hat;
+      }
+    }
+    return X_hat();
+  }
+
+  // Returns the most recent input vector.
+  Input MostRecentInput() {
+    CHECK(!observations_.empty());
+    Input U = observations_.top().U;
+    return U;
+  }
+
  private:
   struct Observation {
     // Time when the observation was taken.
@@ -331,6 +371,16 @@
     // The measurement noise matrix.
     Eigen::Matrix<Scalar, kNOutputs, kNOutputs> R;
 
+    // Discretized A and Q to use on this update step. These will only be
+    // recalculated if the timestep changes.
+    StateSquare A_d;
+    StateSquare Q_d;
+    aos::monotonic_clock::duration discretization_time;
+
+    // A cached value indicating how much we change X_hat in the prediction step
+    // of this Observation.
+    State predict_update;
+
     // In order to sort the observations in the PriorityQueue object, we
     // need a comparison function.
     friend bool operator<(const Observation &l, const Observation &r) {
@@ -453,45 +503,56 @@
     return Xdot;
   }
 
-  void PredictImpl(const Input &U, std::chrono::nanoseconds dt, State *state,
+  void PredictImpl(Observation *obs, std::chrono::nanoseconds dt, State *state,
                    StateSquare *P) {
-    StateSquare A_c = AForState(*state);
-    StateSquare A_d;
-    StateSquare Q_d;
-    // TODO(james): By far the biggest CPU sink in the localization appears to
-    // be this discretization--it's possible the spline code spikes higher,
-    // but it doesn't create anywhere near the same sustained load. There
-    // are a few potential options for optimizing this code, but none of
-    // them are entirely trivial, e.g. we could:
-    // -Reduce the number of states (this function grows at O(kNStates^3))
-    // -Adjust the discretization function itself (there're a few things we
-    //  can tune there).
-    // -Try to come up with some sort of lookup table or other way of
-    //  pre-calculating A_d and Q_d.
-    // I also have to figure out how much we care about the precision of
-    // some of these values--I don't think we care much, but we probably
-    // do want to maintain some of the structure of the matrices.
-    controls::DiscretizeQAFast(Q_continuous_, A_c, dt, &Q_d, &A_d);
+    // Only recalculate the discretization if the timestep has changed.
+    // Technically, this isn't quite correct, since the discretization will
+    // change depending on the current state. However, the slight loss of
+    // precision seems acceptable for the sake of significantly reducing CPU
+    // usage.
+    if (obs->discretization_time != dt) {
+      // TODO(james): By far the biggest CPU sink in the localization appears to
+      // be this discretization--it's possible the spline code spikes higher,
+      // but it doesn't create anywhere near the same sustained load. There
+      // are a few potential options for optimizing this code, but none of
+      // them are entirely trivial, e.g. we could:
+      // -Reduce the number of states (this function grows at O(kNStates^3))
+      // -Adjust the discretization function itself (there're a few things we
+      //  can tune there).
+      // -Try to come up with some sort of lookup table or other way of
+      //  pre-calculating A_d and Q_d.
+      // I also have to figure out how much we care about the precision of
+      // some of these values--I don't think we care much, but we probably
+      // do want to maintain some of the structure of the matrices.
+      const StateSquare A_c = AForState(*state);
+      controls::DiscretizeQAFast(Q_continuous_, A_c, dt, &obs->Q_d, &obs->A_d);
+      obs->discretization_time = dt;
 
-    *state = RungeKuttaU(
-        [this](const State &X, const Input &U) { return DiffEq(X, U); }, *state,
-        U, aos::time::DurationInSeconds(dt));
+      obs->predict_update =
+          RungeKuttaU(
+              [this](const State &X, const Input &U) { return DiffEq(X, U); },
+              *state, obs->U, aos::time::DurationInSeconds(dt)) -
+          *state;
+    }
 
-    StateSquare Ptemp = A_d * *P * A_d.transpose() + Q_d;
+    *state += obs->predict_update;
+
+    StateSquare Ptemp = obs->A_d * *P * obs->A_d.transpose() + obs->Q_d;
     *P = Ptemp;
   }
 
-  void CorrectImpl(const Eigen::Matrix<Scalar, kNOutputs, kNOutputs> &R,
-                   const Output &Z, const Output &expected_Z,
-                   const Eigen::Matrix<Scalar, kNOutputs, kNStates> &H,
-                   State *state, StateSquare *P) {
-    Output err = Z - expected_Z;
-    Eigen::Matrix<Scalar, kNStates, kNOutputs> PH = *P * H.transpose();
-    Eigen::Matrix<Scalar, kNOutputs, kNOutputs> S = H * PH + R;
-    Eigen::Matrix<Scalar, kNStates, kNOutputs> K = PH * S.inverse();
-    *state += K * err;
-    StateSquare Ptemp = (StateSquare::Identity() - K * H) * *P;
+  void CorrectImpl(Observation *obs, State *state, StateSquare *P) {
+    const Eigen::Matrix<Scalar, kNOutputs, kNStates> H = obs->dhdx(*state);
+    // Note: Technically, this does calculate P * H.transpose() twice. However,
+    // when I was mucking around with some things, I found that in practice
+    // putting everything into one expression and letting Eigen optimize it
+    // directly actually improved performance relative to precalculating P *
+    // H.transpose().
+    const Eigen::Matrix<Scalar, kNStates, kNOutputs> K =
+        *P * H.transpose() * (H * *P * H.transpose() + obs->R).inverse();
+    const StateSquare Ptemp = (StateSquare::Identity() - K * H) * *P;
     *P = Ptemp;
+    *state += K * (obs->z - obs->h(*state, obs->U));
   }
 
   void ProcessObservation(Observation *obs, const std::chrono::nanoseconds dt,
@@ -499,14 +560,13 @@
     *state = obs->X_hat;
     *P = obs->P;
     if (dt.count() != 0) {
-      PredictImpl(obs->U, dt, state, P);
+      PredictImpl(obs, dt, state, P);
     }
     if (!(obs->h && obs->dhdx)) {
       CHECK(obs->make_h);
       obs->make_h(*state, *P, &obs->h, &obs->dhdx);
     }
-    CorrectImpl(obs->R, obs->z, obs->h(*state, obs->U), obs->dhdx(*state),
-                state, P);
+    CorrectImpl(obs, state, P);
   }
 
   DrivetrainConfig<double> dt_config_;
@@ -547,9 +607,10 @@
                   "initialized.\n";
     return;
   }
-  auto cur_it =
-      observations_.PushFromBottom({t, t, State::Zero(), StateSquare::Zero(),
-                                    Input::Zero(), z, make_h, h, dhdx, R});
+  auto cur_it = observations_.PushFromBottom(
+      {t, t, State::Zero(), StateSquare::Zero(), Input::Zero(), z, make_h, h,
+       dhdx, R, StateSquare::Identity(), StateSquare::Zero(),
+       std::chrono::seconds(0), State::Zero()});
   if (cur_it == observations_.end()) {
     VLOG(1) << "Camera dropped off of end with time of "
             << aos::time::DurationInSeconds(t.time_since_epoch())
@@ -560,7 +621,6 @@
             << "s.\n";
     return;
   }
-
   // Now we populate any state information that depends on where the
   // observation was inserted into the queue. X_hat and P must be populated
   // from the values present in the observation *following* this one in
@@ -594,6 +654,12 @@
     next_it->prev_t = cur_it->t;
     cur_it->U = (U == nullptr) ? next_it->U : *U;
   }
+
+  if (kFullRewindOnEverySample) {
+    next_it = observations_.begin();
+    cur_it = next_it++;
+  }
+
   // Now we need to rerun the predict step from the previous to the new
   // observation as well as every following correct/predict up to the current
   // time.
diff --git a/frc971/control_loops/python/spline_graph.py b/frc971/control_loops/python/spline_graph.py
index 7885ec1..f6e3a0c 100755
--- a/frc971/control_loops/python/spline_graph.py
+++ b/frc971/control_loops/python/spline_graph.py
@@ -5,6 +5,7 @@
 gi.require_version('Gtk', '3.0')
 from gi.repository import Gdk, Gtk, GLib
 import cairo
+import basic_window
 
 class GridWindow(Gtk.Window):
     def method_connect(self, event, cb):
@@ -79,6 +80,7 @@
         self.drawing_area = GTK_Widget()
         self.eventBox.add(self.drawing_area)
 
+        self.method_connect("delete-event", basic_window.quit_main_loop)
         self.method_connect("key-release-event", self.key_press)
         self.method_connect("button-release-event", self.button_press)
         self.method_connect("configure-event", self.configure)
diff --git a/y2019/control_loops/drivetrain/localized_drivetrain_test.cc b/y2019/control_loops/drivetrain/localized_drivetrain_test.cc
index c5a2d18..46b9b8b 100644
--- a/y2019/control_loops/drivetrain/localized_drivetrain_test.cc
+++ b/y2019/control_loops/drivetrain/localized_drivetrain_test.cc
@@ -383,7 +383,7 @@
   // the target exactly. Instead, just run slightly past the target:
   EXPECT_LT(
       ::std::abs(::aos::math::DiffAngle(M_PI, drivetrain_plant_.state()(2, 0))),
-      0.5);
+      1.0);
   EXPECT_GT(HPSlotLeft().abs_pos().x(), drivetrain_plant_.state().x());
   EXPECT_NEAR(HPSlotLeft().abs_pos().y(), drivetrain_plant_.state().y(), 0.2);
 }
diff --git a/y2020/constants.cc b/y2020/constants.cc
index eefc158..131b0de 100644
--- a/y2020/constants.cc
+++ b/y2020/constants.cc
@@ -26,6 +26,7 @@
 
 const uint16_t kCompTeamNumber = 971;
 const uint16_t kPracticeTeamNumber = 9971;
+const uint16_t kSpareRoborioTeamNumber = 6971;
 
 const Values *DoGetValuesForTeam(uint16_t team) {
   Values *const r = new Values();
@@ -99,6 +100,7 @@
   switch (team) {
     // A set of constants for tests.
     case 1:
+    case kSpareRoborioTeamNumber:
       break;
 
     case kCompTeamNumber:
diff --git a/y2020/control_loops/drivetrain/localizer.cc b/y2020/control_loops/drivetrain/localizer.cc
index f84be3e..7d7c83f 100644
--- a/y2020/control_loops/drivetrain/localizer.cc
+++ b/y2020/control_loops/drivetrain/localizer.cc
@@ -122,7 +122,7 @@
   return best_data_match;
 }
 
-void Localizer::Update(const ::Eigen::Matrix<double, 2, 1> &U,
+void Localizer::Update(const Eigen::Matrix<double, 2, 1> &U,
                        aos::monotonic_clock::time_point now,
                        double left_encoder, double right_encoder,
                        double gyro_rate, const Eigen::Vector3d &accel) {
@@ -251,24 +251,36 @@
       AOS_LOG(WARNING, "Dropped image match due to heading mismatch.\n");
       continue;
     }
+    // In order to do the EKF correction, we determine the expected state based
+    // on the state at the time the image was captured; however, we insert the
+    // correction update itself at the current time. This is technically not
+    // quite correct, but saves substantial CPU usage by making it so that we
+    // don't have to constantly rewind the entire EKF history.
+    const std::optional<State> state_at_capture =
+        ekf_.LastStateBeforeTime(capture_time);
+    if (!state_at_capture.has_value()) {
+      AOS_LOG(WARNING, "Dropped image match due to age of image.\n");
+      continue;
+    }
+    const Input U = ekf_.MostRecentInput();
     // For the correction step, instead of passing in the measurement directly,
     // we pass in (0, 0, 0) as the measurement and then for the expected
     // measurement (Zhat) we calculate the error between the implied and actual
     // poses. This doesn't affect any of the math, it just makes the code a bit
     // more convenient to write given the Correct() interface we already have.
     ekf_.Correct(
-        Eigen::Vector3f::Zero(), nullptr, {},
-        [H, H_field_target, pose_robot_target](
-            const State &X, const Input &) -> Eigen::Vector3f {
-          const Eigen::Vector3f Z =
-              CalculateImpliedPose(X, H_field_target, pose_robot_target);
+        Eigen::Vector3f::Zero(), &U, {},
+        [H, H_field_target, pose_robot_target, state_at_capture](
+            const State &, const Input &) -> Eigen::Vector3f {
+          const Eigen::Vector3f Z = CalculateImpliedPose(
+              state_at_capture.value(), H_field_target, pose_robot_target);
           // Just in case we ever do encounter any, drop measurements if they
           // have non-finite numbers.
           if (!Z.allFinite()) {
             AOS_LOG(WARNING, "Got measurement with infinites or NaNs.\n");
             return Eigen::Vector3f::Zero();
           }
-          Eigen::Vector3f Zhat = H * X - Z;
+          Eigen::Vector3f Zhat = H * state_at_capture.value() - Z;
           // Rewrap angle difference to put it back in range. Note that this
           // component of the error is currently ignored (see definition of H
           // above).
@@ -283,7 +295,7 @@
           }
           return Zhat;
         },
-        [H](const State &) { return H; }, R, capture_time);
+        [H](const State &) { return H; }, R, now);
   }
 }
 
diff --git a/y2020/y2020_roborio.json b/y2020/y2020_roborio.json
index 7de7f51..b63982d 100644
--- a/y2020/y2020_roborio.json
+++ b/y2020/y2020_roborio.json
@@ -263,6 +263,7 @@
       "hostname": "roborio",
       "hostnames": [
         "roboRIO-971-FRC",
+        "roboRIO-6971-FRC",
         "roboRIO-7971-FRC",
         "roboRIO-8971-FRC",
         "roboRIO-9971-FRC"