Merge "Adding scale option to viewer, to help with bandwidth issues"

commit: 567ab81248a751835e53eb0f3a42e9ffad57fdc0 [log] [tgz]
author: Jim Ostrowski <yimmy13@gmail.com> Sat Sep 16 12:29:24 2023 -0700
committer: Gerrit Code Review <gerrit@software.frc971.org> Sat Sep 16 12:29:24 2023 -0700
tree: c251115b41327c6c1fb8bd2fadaa8afc81dc07f5
parent: faec51a2befca1af4f0ec8a529a2e4e77539bbb5 [diff]
parent: e2d6f6684a7318dec5a5b3f33ab2151e02105ce6 [diff]
diff --git a/WORKSPACE b/WORKSPACE
index 8db3b72..c603581 100644
--- a/WORKSPACE
+++ b/WORKSPACE

@@ -587,16 +587,16 @@
 http_archive(
     name = "rsync",
     build_file = "@//debian:rsync.BUILD",
-    sha256 = "53be65a9214aaa6d1b9176f135184fb4a78ccefd58f95ce0da37e6a392dfeb60",
-    url = "https://software.frc971.org/Build-Dependencies/rsync.tar.gz",
+    sha256 = "75ea8ce442c94fd12c0d00eb24860ac1de5ea6af56154bb1b195a96018c9e8a2",
+    url = "https://software.frc971.org/Build-Dependencies/rsync-2023.09.06.tar.gz",
 )
 
 # //debian:ssh
 http_archive(
     name = "ssh",
     build_file = "@//debian:ssh.BUILD",
-    sha256 = "470fdc1252a2133a9d3c3da778e892a5b88f04f402cb04d8eb1cff7853242034",
-    url = "https://software.frc971.org/Build-Dependencies/ssh_v3.tar.gz",
+    sha256 = "9c4a9eefa605283486fb15a44ef9977d4a95b55c3a41c4e71cfbacd1cf20a4b5",
+    url = "https://software.frc971.org/Build-Dependencies/ssh-2023.09.06.tar.gz",
 )
 
 http_archive(

diff --git a/aos/BUILD b/aos/BUILD
index 34af003..acef762 100644
--- a/aos/BUILD
+++ b/aos/BUILD

@@ -198,6 +198,25 @@
     visibility = ["//visibility:public"],
 )
 
+autocxx_library(
+    name = "test_init_rs",
+    testonly = True,
+    srcs = ["test_init.rs"],
+    crate_name = "aos_test_init",
+    libs = [
+        "//aos/testing:tmpdir",
+    ],
+    override_cc_toolchain = "@llvm_toolchain//:cc-clang-x86_64-linux",
+    target_compatible_with = select({
+        "//conditions:default": ["//tools/platforms/rust:has_support"],
+        "//tools:has_msan": ["@platforms//:incompatible"],
+    }),
+    visibility = ["//visibility:public"],
+    deps = [
+        ":init_rs",
+    ],
+)
+
 cc_library(
     name = "realtime",
     srcs = [
@@ -790,7 +809,18 @@
     target_compatible_with = ["@platforms//os:linux"],
     visibility = ["//visibility:public"],
     deps = [
+        "//aos/util:file",
         "@boringssl//:crypto",
         "@com_google_absl//absl/types:span",
     ],
 )
+
+cc_test(
+    name = "sha256_test",
+    srcs = ["sha256_test.cc"],
+    deps = [
+        ":sha256",
+        "//aos/testing:googletest",
+        "//aos/testing:tmpdir",
+    ],
+)

diff --git a/aos/config.bzl b/aos/config.bzl
index d281027..faaf45a 100644
--- a/aos/config.bzl
+++ b/aos/config.bzl

@@ -41,22 +41,32 @@
 
     all_files = flatbuffers_depset.to_list() + src_depset.to_list()
     ctx.actions.run(
-        outputs = [config, stripped_config, binary_config],
+        outputs = [stripped_config, binary_config],
         inputs = all_files,
         arguments = ctx.attr.flags + [
-            config.path,
-            stripped_config.path,
-            binary_config.path,
+            "--stripped_output=" + stripped_config.path,
+            "--binary_output=" + binary_config.path,
             (ctx.label.workspace_root or ".") + "/" + ctx.files.src[0].short_path,
             ctx.bin_dir.path,
         ] + [f.path for f in flatbuffers_depset.to_list()],
         progress_message = "Flattening config",
         executable = ctx.executable._config_flattener,
     )
-    runfiles = ctx.runfiles(files = [config, stripped_config, binary_config])
+    ctx.actions.run(
+        outputs = [config],
+        inputs = all_files,
+        arguments = ctx.attr.flags + [
+            "--full_output=" + config.path,
+            (ctx.label.workspace_root or ".") + "/" + ctx.files.src[0].short_path,
+            ctx.bin_dir.path,
+        ] + [f.path for f in flatbuffers_depset.to_list()],
+        progress_message = "Flattening config",
+        executable = ctx.executable._config_flattener,
+    )
+    runfiles = ctx.runfiles(files = [stripped_config, binary_config])
     return [
         DefaultInfo(
-            files = depset([config, stripped_config, binary_config]),
+            files = depset([stripped_config, binary_config]),
             runfiles = runfiles,
         ),
         AosConfigInfo(

diff --git a/aos/config_flattener.cc b/aos/config_flattener.cc
index b089d24..8170a89 100644
--- a/aos/config_flattener.cc
+++ b/aos/config_flattener.cc

@@ -9,19 +9,25 @@
 #include "aos/json_to_flatbuffer.h"
 #include "aos/util/file.h"
 
+DEFINE_string(full_output, "",
+              "If provided, the path to write the full json configuration to.");
+DEFINE_string(
+    stripped_output, "",
+    "If provided, the path to write the stripped json configuration to.");
+DEFINE_string(
+    binary_output, "",
+    "If provided, the path to write the binary flatbuffer configuration to.");
+
 namespace aos {
 
 int Main(int argc, char **argv) {
-  CHECK_GE(argc, 6) << ": Too few arguments";
+  CHECK_GE(argc, 3) << ": Too few arguments";
 
-  const char *full_output = argv[1];
-  const char *stripped_output = argv[2];
-  const char *binary_output = argv[3];
-  const char *config_path = argv[4];
+  const char *config_path = argv[1];
   // In order to support not only importing things by absolute path, but also
   // importing the outputs of genrules (rather than just manually written
   // files), we need to tell ReadConfig where the generated files directory is.
-  const char *bazel_outs_directory = argv[5];
+  const char *bazel_outs_directory = argv[2];
 
   VLOG(1) << "Reading " << config_path;
   FlatbufferDetachedBuffer<Configuration> config =
@@ -38,7 +44,7 @@
 
   std::vector<aos::FlatbufferVector<reflection::Schema>> schemas;
 
-  for (int i = 6; i < argc; ++i) {
+  for (int i = 3; i < argc; ++i) {
     schemas.emplace_back(FileToFlatbuffer<reflection::Schema>(argv[i]));
   }
 
@@ -55,11 +61,17 @@
   // TODO(austin): Figure out how to squash the schemas onto 1 line so it is
   // easier to read?
   VLOG(1) << "Flattened config is " << merged_config_json;
-  util::WriteStringToFileOrDie(full_output, merged_config_json);
-  util::WriteStringToFileOrDie(
-      stripped_output,
-      FlatbufferToJson(&config.message(), {.multi_line = true}));
-  aos::WriteFlatbufferToFile(binary_output, merged_config);
+  if (!FLAGS_full_output.empty()) {
+    util::WriteStringToFileOrDie(FLAGS_full_output, merged_config_json);
+  }
+  if (!FLAGS_stripped_output.empty()) {
+    util::WriteStringToFileOrDie(
+        FLAGS_stripped_output,
+        FlatbufferToJson(&config.message(), {.multi_line = true}));
+  }
+  if (!FLAGS_binary_output.empty()) {
+    aos::WriteFlatbufferToFile(FLAGS_binary_output, merged_config);
+  }
   return 0;
 }
 

diff --git a/aos/configuration.cc b/aos/configuration.cc
index 70e7b48..78f9eb2 100644
--- a/aos/configuration.cc
+++ b/aos/configuration.cc

@@ -1144,6 +1144,13 @@
   return nullptr;
 }
 
+std::string_view NodeName(const Configuration *config, size_t node_index) {
+  if (!configuration::MultiNode(config)) {
+    return "(singlenode)";
+  }
+  return config->nodes()->Get(node_index)->name()->string_view();
+}
+
 const Node *GetMyNode(const Configuration *config) {
   const std::string hostname = (FLAGS_override_hostname.size() > 0)
                                    ? FLAGS_override_hostname
@@ -1641,9 +1648,10 @@
                  chrono::nanoseconds channel_storage_duration) {
   // Use integer arithmetic and round up at all cost.
   return static_cast<int>(
-      (999999999 + static_cast<int64_t>(frequency) *
-                       static_cast<int64_t>(channel_storage_duration.count())) /
-      static_cast<int64_t>(1000000000));
+      (999'999'999 +
+       static_cast<int64_t>(frequency) *
+           static_cast<int64_t>(channel_storage_duration.count())) /
+      static_cast<int64_t>(1'000'000'000));
 }
 
 int QueueScratchBufferSize(const Channel *channel) {

diff --git a/aos/configuration.h b/aos/configuration.h
index e35c0bf..01b9e7c 100644
--- a/aos/configuration.h
+++ b/aos/configuration.h

@@ -128,6 +128,10 @@
 const Node *GetNodeFromHostname(const Configuration *config,
                                 std::string_view name);
 
+// Returns a printable name for the node.  (singlenode) if we are on a single
+// node system, and the name otherwise.
+std::string_view NodeName(const Configuration *config, size_t node_index);
+
 // Returns a vector of the nodes in the config.  (nullptr is considered the node
 // in a single node world.)
 std::vector<const Node *> GetNodes(const Configuration *config);

diff --git a/aos/events/BUILD b/aos/events/BUILD
index 174df38..c398e7b 100644
--- a/aos/events/BUILD
+++ b/aos/events/BUILD

@@ -460,6 +460,7 @@
         "//aos:init",
         "//aos:realtime",
         "//aos/ipc_lib:lockless_queue",
+        "//aos/ipc_lib:shm_base",
         "//aos/ipc_lib:signalfd",
         "//aos/stl_mutex",
         "//aos/util:phased_loop",
@@ -471,7 +472,7 @@
     name = "shm_event_loop_test",
     srcs = ["shm_event_loop_test.cc"],
     flaky = True,
-    shard_count = 24,
+    shard_count = 50,
     target_compatible_with = ["@platforms//os:linux"],
     deps = [
         ":event_loop_param_test",
@@ -622,7 +623,7 @@
     rustc_flags = ["-Crelocation-model=static"],
     deps = [
         ":ping_rust_fbs",
-        "//aos:init_rs",
+        "//aos:test_init_rs",
         "@crate_index//:futures",
         "@rules_rust//tools/runfiles",
     ],
@@ -672,7 +673,7 @@
     }),
     deps = [
         ":ping_rust_fbs",
-        "//aos:init_rs",
+        "//aos:test_init_rs",
         "@crate_index//:futures",
         "@rules_rust//tools/runfiles",
     ],

diff --git a/aos/events/event_loop.cc b/aos/events/event_loop.cc
index 072630f..ba71b97 100644
--- a/aos/events/event_loop.cc
+++ b/aos/events/event_loop.cc

@@ -35,6 +35,8 @@
 }
 }  // namespace
 
+std::optional<std::string> EventLoop::default_version_string_;
+
 std::pair<SharedSpan, absl::Span<uint8_t>> MakeSharedSpan(size_t size) {
   AlignedOwningSpan *const span = reinterpret_cast<AlignedOwningSpan *>(
       malloc(sizeof(AlignedOwningSpan) + size + kChannelDataAlignment - 1));
@@ -60,8 +62,12 @@
 }
 
 void RawSender::CheckOk(const RawSender::Error err) {
-  CHECK_EQ(err, Error::kOk) << "Messages were sent too fast on channel: "
-                            << configuration::CleanedChannelToString(channel_);
+  if (err != Error::kOk) {
+    event_loop_->SendTimingReport();
+    CHECK_EQ(err, Error::kOk)
+        << "Messages were sent too fast on channel: "
+        << configuration::CleanedChannelToString(channel_);
+  }
 }
 
 RawSender::RawSender(EventLoop *event_loop, const Channel *channel)
@@ -146,7 +152,8 @@
 PhasedLoopHandler::~PhasedLoopHandler() {}
 
 EventLoop::EventLoop(const Configuration *configuration)
-    : timing_report_(flatbuffers::DetachedBuffer()),
+    : version_string_(default_version_string_),
+      timing_report_(flatbuffers::DetachedBuffer()),
       configuration_(configuration) {}
 
 EventLoop::~EventLoop() {
@@ -472,8 +479,13 @@
   flatbuffers::Offset<flatbuffers::String> name_offset =
       fbb.CreateString(name());
 
+  const flatbuffers::Offset<flatbuffers::String> version_offset =
+      version_string_.has_value() ? fbb.CreateString(version_string_.value())
+                                  : flatbuffers::Offset<flatbuffers::String>();
+
   timing::Report::Builder report_builder(fbb);
   report_builder.add_name(name_offset);
+  report_builder.add_version(version_offset);
   report_builder.add_pid(GetTid());
   if (timer_offsets.size() > 0) {
     report_builder.add_timers(timers_offset);
@@ -642,6 +654,18 @@
 
 cpu_set_t EventLoop::DefaultAffinity() { return aos::DefaultAffinity(); }
 
+void EventLoop::SetDefaultVersionString(std::string_view version) {
+  default_version_string_ = version;
+}
+
+void EventLoop::SetVersionString(std::string_view version) {
+  CHECK(!is_running())
+      << ": Can't do things that might alter the timing report while running.";
+  version_string_ = version;
+
+  UpdateTimingReport();
+}
+
 void WatcherState::set_timing_report(timing::Watcher *watcher) {
   watcher_ = watcher;
   if (!watcher) {

diff --git a/aos/events/event_loop.fbs b/aos/events/event_loop.fbs
index c0aaf19..15c6149 100644
--- a/aos/events/event_loop.fbs
+++ b/aos/events/event_loop.fbs

@@ -95,6 +95,11 @@
   // Identifier for the event loop.  This should change every time a process
   // gets restarted.
   pid:int (id: 1);
+  // Version string associated with this application. This can be empty or
+  // anything (an actual version, a git sha, etc.). This provides a convenient
+  // way for applications to self-report their version in a way that gets
+  // logged.
+  version:string (id: 8);
 
   // List of statistics for each watcher, sender, fetcher, timer, and
   // phased loop.

diff --git a/aos/events/event_loop.h b/aos/events/event_loop.h
index 17e3e00..1ed4337 100644
--- a/aos/events/event_loop.h
+++ b/aos/events/event_loop.h

@@ -48,9 +48,16 @@
   // Fetches the next message in the queue without blocking. Returns true if
   // there was a new message and we got it.
   bool FetchNext();
+  // Fetches the next message if there is one, and the provided function returns
+  // true.  The data and buffer_index are the only pieces of the Context which
+  // are zeroed out.  The function must be valid.
+  bool FetchNextIf(std::function<bool(const Context &context)> fn);
 
   // Fetches the latest message without blocking.
   bool Fetch();
+  // Fetches the latest message conditionally without blocking.  fn must be
+  // valid.
+  bool FetchIf(std::function<bool(const Context &context)> fn);
 
   // Returns the channel this fetcher uses.
   const Channel *channel() const { return channel_; }
@@ -67,7 +74,11 @@
   friend class EventLoop;
   // Implementation
   virtual std::pair<bool, monotonic_clock::time_point> DoFetchNext() = 0;
+  virtual std::pair<bool, monotonic_clock::time_point> DoFetchNextIf(
+      std::function<bool(const Context &)> fn) = 0;
   virtual std::pair<bool, monotonic_clock::time_point> DoFetch() = 0;
+  virtual std::pair<bool, monotonic_clock::time_point> DoFetchIf(
+      std::function<bool(const Context &)> fn) = 0;
 
   EventLoop *const event_loop_;
   const Channel *const channel_;
@@ -241,6 +252,18 @@
     return result;
   }
 
+  // Fetches the next message if there is one, and the provided function returns
+  // true.  The data and buffer_index are the only pieces of the Context which
+  // are zeroed out.  The function must be valid.
+  bool FetchNextIf(std::function<bool(const Context &)> fn) {
+    const bool result = CHECK_NOTNULL(fetcher_)->FetchNextIf(std::move(fn));
+    if (result) {
+      CheckChannelDataAlignment(fetcher_->context().data,
+                                fetcher_->context().size);
+    }
+    return result;
+  }
+
   // Fetches the most recent message. Returns true if it fetched a new message.
   // This will return the latest message regardless of if it was sent before or
   // after the fetcher was created.
@@ -253,6 +276,18 @@
     return result;
   }
 
+  // Fetches the most recent message conditionally. Returns true if it fetched a
+  // new message. This will return the latest message regardless of if it was
+  // sent before or after the fetcher was created.  The function must be valid.
+  bool FetchIf(std::function<bool(const Context &)> fn) {
+    const bool result = CHECK_NOTNULL(fetcher_)->FetchIf(std::move(fn));
+    if (result) {
+      CheckChannelDataAlignment(fetcher_->context().data,
+                                fetcher_->context().size);
+    }
+    return result;
+  }
+
   // Returns a pointer to the contained flatbuffer, or nullptr if there is no
   // available message.
   const T *get() const {
@@ -773,6 +808,16 @@
   // Returns the boot UUID.
   virtual const UUID &boot_uuid() const = 0;
 
+  // Sets the version string that will be used in any newly constructed
+  // EventLoop objects. This can be overridden for individual EventLoop's by
+  // calling EventLoop::SetVersionString(). The version string is populated into
+  // the timing report message. Makes a copy of the provided string_view.
+  static void SetDefaultVersionString(std::string_view version);
+
+  // Overrides the version string for this event loop. Makes a copy of the
+  // provided string_view.
+  void SetVersionString(std::string_view version);
+
  protected:
   // Sets the name of the event loop.  This is the application name.
   virtual void set_name(const std::string_view name) = 0;
@@ -863,6 +908,13 @@
  private:
   virtual pid_t GetTid() = 0;
 
+  // Default version string to be used in the timing report for any newly
+  // created EventLoop objects.
+  static std::optional<std::string> default_version_string_;
+
+  // Timing report version string for this event loop.
+  std::optional<std::string> version_string_;
+
   FlatbufferDetachedBuffer<timing::Report> timing_report_;
 
   ::std::atomic<bool> is_running_{false};

diff --git a/aos/events/event_loop_param_test.cc b/aos/events/event_loop_param_test.cc
index 0559aae..c861284 100644
--- a/aos/events/event_loop_param_test.cc
+++ b/aos/events/event_loop_param_test.cc

@@ -319,6 +319,93 @@
   }
 }
 
+std::function<bool(const Context &)> MakeShouldFetch(
+    bool should_fetch, size_t *called_count = nullptr) {
+  return [should_fetch, called_count](const Context &) {
+    if (called_count != nullptr) {
+      (*called_count)++;
+    }
+    return should_fetch;
+  };
+}
+
+// Tests that a fetcher using FetchIf can fetch from a sender.
+TEST_P(AbstractEventLoopTest, FetchIfWithoutRun) {
+  auto loop1 = Make();
+  auto loop2 = Make();
+  auto loop3 = MakePrimary();
+
+  auto sender = loop1->MakeSender<TestMessage>("/test");
+
+  auto fetcher = loop2->MakeFetcher<TestMessage>("/test");
+
+  for (const bool should_fetch : {true, false}) {
+    EXPECT_FALSE(fetcher.FetchIf(MakeShouldFetch(should_fetch)));
+    EXPECT_EQ(fetcher.get(), nullptr);
+
+    EXPECT_EQ(fetcher.context().monotonic_event_time,
+              monotonic_clock::min_time);
+    EXPECT_EQ(fetcher.context().monotonic_remote_time,
+              monotonic_clock::min_time);
+    EXPECT_EQ(fetcher.context().realtime_event_time, realtime_clock::min_time);
+    EXPECT_EQ(fetcher.context().realtime_remote_time, realtime_clock::min_time);
+    EXPECT_EQ(fetcher.context().source_boot_uuid, UUID::Zero());
+    EXPECT_EQ(fetcher.context().queue_index, 0xffffffffu);
+    EXPECT_EQ(fetcher.context().size, 0u);
+    EXPECT_EQ(fetcher.context().data, nullptr);
+    EXPECT_EQ(fetcher.context().buffer_index, -1);
+  }
+
+  aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
+  TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
+  builder.add_value(200);
+  msg.CheckOk(msg.Send(builder.Finish()));
+
+  // Make sure failing to fetch won't affect anything.
+  EXPECT_FALSE(fetcher.FetchIf(MakeShouldFetch(false)));
+  EXPECT_EQ(fetcher.get(), nullptr);
+
+  EXPECT_EQ(fetcher.context().monotonic_event_time, monotonic_clock::min_time);
+  EXPECT_EQ(fetcher.context().monotonic_remote_time, monotonic_clock::min_time);
+  EXPECT_EQ(fetcher.context().realtime_event_time, realtime_clock::min_time);
+  EXPECT_EQ(fetcher.context().realtime_remote_time, realtime_clock::min_time);
+  EXPECT_EQ(fetcher.context().source_boot_uuid, UUID::Zero());
+  EXPECT_EQ(fetcher.context().queue_index, 0xffffffffu);
+  EXPECT_EQ(fetcher.context().size, 0u);
+  EXPECT_EQ(fetcher.context().data, nullptr);
+  EXPECT_EQ(fetcher.context().buffer_index, -1);
+
+  // And now confirm we succeed and everything gets set right.
+  EXPECT_TRUE(fetcher.FetchIf(MakeShouldFetch(true)));
+  ASSERT_FALSE(fetcher.get() == nullptr);
+  EXPECT_EQ(fetcher.get()->value(), 200);
+
+  const chrono::milliseconds kEpsilon(100);
+
+  const aos::monotonic_clock::time_point monotonic_now = loop2->monotonic_now();
+  const aos::realtime_clock::time_point realtime_now = loop2->realtime_now();
+  EXPECT_EQ(fetcher.context().monotonic_event_time,
+            fetcher.context().monotonic_remote_time);
+  EXPECT_EQ(fetcher.context().realtime_event_time,
+            fetcher.context().realtime_remote_time);
+
+  EXPECT_GE(fetcher.context().monotonic_event_time, monotonic_now - kEpsilon);
+  EXPECT_LE(fetcher.context().monotonic_event_time, monotonic_now + kEpsilon);
+  EXPECT_GE(fetcher.context().realtime_event_time, realtime_now - kEpsilon);
+  EXPECT_LE(fetcher.context().realtime_event_time, realtime_now + kEpsilon);
+  EXPECT_EQ(fetcher.context().source_boot_uuid, loop2->boot_uuid());
+  EXPECT_EQ(fetcher.context().queue_index, 0x0u);
+  EXPECT_EQ(fetcher.context().size, 20u);
+  EXPECT_NE(fetcher.context().data, nullptr);
+  if (read_method() == ReadMethod::PIN) {
+    EXPECT_GE(fetcher.context().buffer_index, 0);
+    EXPECT_LT(fetcher.context().buffer_index,
+              loop2->NumberBuffers(fetcher.channel()));
+  } else {
+    EXPECT_EQ(fetcher.context().buffer_index, -1);
+  }
+}
+
 // Tests that watcher will receive all messages sent if they are sent after
 // initialization and before running.
 TEST_P(AbstractEventLoopTest, DoubleSendAtStartup) {
@@ -345,16 +432,10 @@
   }
 
   loop2->OnRun([&]() {
-    {
+    for (int i = 200; i < 202; ++i) {
       aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
       TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-      builder.add_value(200);
-      msg.CheckOk(msg.Send(builder.Finish()));
-    }
-    {
-      aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-      TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-      builder.add_value(201);
+      builder.add_value(i);
       msg.CheckOk(msg.Send(builder.Finish()));
     }
   });
@@ -374,16 +455,10 @@
 
   ::std::vector<int> values;
 
-  {
+  for (int i = 200; i < 202; ++i) {
     aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
     TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(200);
-    msg.CheckOk(msg.Send(builder.Finish()));
-  }
-  {
-    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(201);
+    builder.add_value(i);
     msg.CheckOk(msg.Send(builder.Finish()));
   }
 
@@ -412,16 +487,10 @@
 
   ::std::vector<int> values;
 
-  {
+  for (int i = 200; i < 202; ++i) {
     aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
     TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(200);
-    msg.CheckOk(msg.Send(builder.Finish()));
-  }
-  {
-    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(201);
+    builder.add_value(i);
     msg.CheckOk(msg.Send(builder.Finish()));
   }
 
@@ -451,16 +520,10 @@
 
   ::std::vector<int> values;
 
-  {
+  for (int i = 200; i < 202; ++i) {
     aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
     TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(200);
-    msg.CheckOk(msg.Send(builder.Finish()));
-  }
-  {
-    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(201);
+    builder.add_value(i);
     msg.CheckOk(msg.Send(builder.Finish()));
   }
 
@@ -483,6 +546,41 @@
   EXPECT_THAT(0, values.size());
 }
 
+// Tests that FetchNextIf gets no messages sent before it is constructed.
+TEST_P(AbstractEventLoopTest, FetchNextIfAfterSend) {
+  auto loop1 = Make();
+  auto loop2 = MakePrimary();
+
+  auto sender = loop1->MakeSender<TestMessage>("/test");
+
+  ::std::vector<int> values;
+
+  for (int i = 200; i < 202; ++i) {
+    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
+    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
+    builder.add_value(i);
+    msg.CheckOk(msg.Send(builder.Finish()));
+  }
+
+  auto fetcher = loop2->MakeFetcher<TestMessage>("/test");
+
+  // Add a timer to actually quit.
+  auto test_timer = loop2->AddTimer([&fetcher, &values, this]() {
+    while (fetcher.FetchNextIf(MakeShouldFetch(true))) {
+      values.push_back(fetcher.get()->value());
+    }
+    this->Exit();
+  });
+
+  loop2->OnRun([&test_timer, &loop2]() {
+    test_timer->Schedule(loop2->monotonic_now(),
+                         ::std::chrono::milliseconds(100));
+  });
+
+  Run();
+  EXPECT_EQ(0, values.size());
+}
+
 // Tests that Fetch returns the last message created before the loop was
 // started.
 TEST_P(AbstractEventLoopTest, FetchDataFromBeforeCreation) {
@@ -493,16 +591,10 @@
 
   ::std::vector<int> values;
 
-  {
+  for (int i = 200; i < 202; ++i) {
     aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
     TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(200);
-    msg.CheckOk(msg.Send(builder.Finish()));
-  }
-  {
-    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(201);
+    builder.add_value(i);
     msg.CheckOk(msg.Send(builder.Finish()));
   }
 
@@ -529,6 +621,50 @@
   EXPECT_THAT(values, ::testing::ElementsAreArray({201}));
 }
 
+// Tests that FetchIf returns the last message created before the loop was
+// started.
+TEST_P(AbstractEventLoopTest, FetchIfDataFromBeforeCreation) {
+  auto loop1 = Make();
+  auto loop2 = MakePrimary();
+
+  auto sender = loop1->MakeSender<TestMessage>("/test");
+
+  ::std::vector<int> values;
+
+  for (int i = 200; i < 202; ++i) {
+    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
+    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
+    builder.add_value(i);
+    msg.CheckOk(msg.Send(builder.Finish()));
+  }
+
+  auto fetcher = loop2->MakeFetcher<TestMessage>("/test");
+
+  // Add a timer to actually quit.
+  auto test_timer = loop2->AddTimer([&fetcher, &values, this]() {
+    if (fetcher.FetchIf(MakeShouldFetch(true))) {
+      values.push_back(fetcher.get()->value());
+    }
+
+    if (fetcher.FetchIf(MakeShouldFetch(false))) {
+      values.push_back(fetcher.get()->value());
+    }
+    // Do it again to make sure we don't double fetch.
+    if (fetcher.FetchIf(MakeShouldFetch(true))) {
+      values.push_back(fetcher.get()->value());
+    }
+    this->Exit();
+  });
+
+  loop2->OnRun([&test_timer, &loop2]() {
+    test_timer->Schedule(loop2->monotonic_now(),
+                         ::std::chrono::milliseconds(100));
+  });
+
+  Run();
+  EXPECT_THAT(values, ::testing::ElementsAreArray({201}));
+}
+
 // Tests that timer handler is enabled after setup (even if it is in the past)
 // and is disabled after running
 TEST_P(AbstractEventLoopTest, CheckTimerDisabled) {
@@ -636,16 +772,10 @@
 
   ::std::vector<int> values;
 
-  {
+  for (int i = 200; i < 202; ++i) {
     aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
     TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(200);
-    msg.CheckOk(msg.Send(builder.Finish()));
-  }
-  {
-    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-    builder.add_value(201);
+    builder.add_value(i);
     msg.CheckOk(msg.Send(builder.Finish()));
   }
 
@@ -657,22 +787,10 @@
       values.push_back(fetcher.get()->value());
     }
 
-    {
+    for (int i = 202; i < 205; ++i) {
       aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
       TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-      builder.add_value(202);
-      msg.CheckOk(msg.Send(builder.Finish()));
-    }
-    {
-      aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-      TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-      builder.add_value(203);
-      msg.CheckOk(msg.Send(builder.Finish()));
-    }
-    {
-      aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
-      TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
-      builder.add_value(204);
+      builder.add_value(i);
       msg.CheckOk(msg.Send(builder.Finish()));
     }
 
@@ -696,6 +814,62 @@
   EXPECT_THAT(values, ::testing::ElementsAreArray({201, 202, 204}));
 }
 
+// Tests that Fetch{If,} and FetchNext{If,} interleave as expected.
+TEST_P(AbstractEventLoopTest, FetchAndFetchNextIfTogether) {
+  auto loop1 = Make();
+  auto loop2 = MakePrimary();
+
+  auto sender = loop1->MakeSender<TestMessage>("/test");
+
+  ::std::vector<int> values;
+
+  for (int i = 200; i < 202; ++i) {
+    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
+    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
+    builder.add_value(i);
+    msg.CheckOk(msg.Send(builder.Finish()));
+  }
+
+  auto fetcher = loop2->MakeFetcher<TestMessage>("/test");
+
+  // Add a timer to actually quit.
+  auto test_timer = loop2->AddTimer([&fetcher, &values, &sender, this]() {
+    if (fetcher.Fetch()) {
+      values.push_back(fetcher.get()->value());
+    }
+
+    for (int i = 202; i < 205; ++i) {
+      aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
+      TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
+      builder.add_value(i);
+      msg.CheckOk(msg.Send(builder.Finish()));
+    }
+
+    EXPECT_FALSE(fetcher.FetchNextIf(MakeShouldFetch(false)));
+
+    if (fetcher.FetchNext()) {
+      values.push_back(fetcher.get()->value());
+    }
+
+    EXPECT_FALSE(fetcher.FetchNextIf(MakeShouldFetch(false)));
+    EXPECT_FALSE(fetcher.FetchIf(MakeShouldFetch(false)));
+
+    if (fetcher.FetchIf(MakeShouldFetch(true))) {
+      values.push_back(fetcher.get()->value());
+    }
+
+    this->Exit();
+  });
+
+  loop2->OnRun([&test_timer, &loop2]() {
+    test_timer->Schedule(loop2->monotonic_now(),
+                         ::std::chrono::milliseconds(100));
+  });
+
+  Run();
+  EXPECT_THAT(values, ::testing::ElementsAreArray({201, 202, 204}));
+}
+
 // Tests that FetchNext behaves correctly when we get two messages in the queue
 // but don't consume the first until after the second has been sent.
 TEST_P(AbstractEventLoopTest, FetchNextTest) {
@@ -732,6 +906,49 @@
   EXPECT_EQ(200, fetcher.get()->value());
 }
 
+// Tests that FetchNext behaves correctly when we get two messages in the queue
+// but don't consume the first until after the second has been sent.
+TEST_P(AbstractEventLoopTest, FetchNextIfTest) {
+  auto send_loop = Make();
+  auto fetch_loop = Make();
+  auto sender = send_loop->MakeSender<TestMessage>("/test");
+  Fetcher<TestMessage> fetcher = fetch_loop->MakeFetcher<TestMessage>("/test");
+
+  {
+    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
+    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
+    builder.add_value(100);
+    msg.CheckOk(msg.Send(builder.Finish()));
+  }
+
+  {
+    aos::Sender<TestMessage>::Builder msg = sender.MakeBuilder();
+    TestMessage::Builder builder = msg.MakeBuilder<TestMessage>();
+    builder.add_value(200);
+    msg.CheckOk(msg.Send(builder.Finish()));
+  }
+
+  size_t called_count = 0;
+  ASSERT_TRUE(fetcher.FetchNextIf(MakeShouldFetch(true, &called_count)));
+  ASSERT_NE(nullptr, fetcher.get());
+  EXPECT_EQ(100, fetcher.get()->value());
+  EXPECT_EQ(called_count, 1u);
+
+  ASSERT_FALSE(fetcher.FetchNextIf(MakeShouldFetch(false, &called_count)));
+  EXPECT_EQ(called_count, 2u);
+
+  ASSERT_TRUE(fetcher.FetchNextIf(MakeShouldFetch(true, &called_count)));
+  ASSERT_NE(nullptr, fetcher.get());
+  EXPECT_EQ(200, fetcher.get()->value());
+  EXPECT_EQ(called_count, 3u);
+
+  // When we run off the end of the queue, expect to still have the old message:
+  ASSERT_FALSE(fetcher.FetchNextIf(MakeShouldFetch(false, &called_count)));
+  EXPECT_EQ(called_count, 3u);
+  ASSERT_NE(nullptr, fetcher.get());
+  EXPECT_EQ(200, fetcher.get()->value());
+}
+
 // Verify that a fetcher still holds its data, even after falling behind.
 TEST_P(AbstractEventLoopTest, FetcherBehindData) {
   auto send_loop = Make();
@@ -1271,6 +1488,84 @@
   }
 }
 
+// Test that setting a default version string results in it getting populated
+// correctly.
+TEST_P(AbstractEventLoopTest, DefaultVersionStringInTimingReport) {
+  gflags::FlagSaver flag_saver;
+  FLAGS_timing_report_ms = 1000;
+
+  EventLoop::SetDefaultVersionString("default_version_string");
+
+  auto loop = MakePrimary();
+
+  Fetcher<timing::Report> report_fetcher =
+      loop->MakeFetcher<timing::Report>("/aos");
+
+  TimerHandler *exit_timer = loop->AddTimer([this]() { Exit(); });
+  loop->OnRun([exit_timer, &loop, &report_fetcher]() {
+    report_fetcher.Fetch();
+    exit_timer->Schedule(loop->monotonic_now() + std::chrono::seconds(2));
+  });
+
+  Run();
+
+  bool found_primary_report = false;
+  while (report_fetcher.FetchNext()) {
+    if (report_fetcher->name()->string_view() == "primary") {
+      found_primary_report = true;
+      EXPECT_EQ("default_version_string",
+                report_fetcher->version()->string_view());
+    } else {
+      FAIL() << report_fetcher->name()->string_view();
+    }
+  }
+
+  if (do_timing_reports() == DoTimingReports::kYes) {
+    EXPECT_TRUE(found_primary_report);
+  } else {
+    EXPECT_FALSE(found_primary_report);
+  }
+}
+
+// Test that overriding the default version string results in it getting
+// populated correctly.
+TEST_P(AbstractEventLoopTest, OverrideDersionStringInTimingReport) {
+  gflags::FlagSaver flag_saver;
+  FLAGS_timing_report_ms = 1000;
+
+  EventLoop::SetDefaultVersionString("default_version_string");
+
+  auto loop = MakePrimary();
+  loop->SetVersionString("override_version");
+
+  Fetcher<timing::Report> report_fetcher =
+      loop->MakeFetcher<timing::Report>("/aos");
+
+  TimerHandler *exit_timer = loop->AddTimer([this]() { Exit(); });
+  loop->OnRun([exit_timer, &loop, &report_fetcher]() {
+    report_fetcher.Fetch();
+    exit_timer->Schedule(loop->monotonic_now() + std::chrono::seconds(2));
+  });
+
+  Run();
+
+  bool found_primary_report = false;
+  while (report_fetcher.FetchNext()) {
+    if (report_fetcher->name()->string_view() == "primary") {
+      found_primary_report = true;
+      EXPECT_EQ("override_version", report_fetcher->version()->string_view());
+    } else {
+      FAIL() << report_fetcher->name()->string_view();
+    }
+  }
+
+  if (do_timing_reports() == DoTimingReports::kYes) {
+    EXPECT_TRUE(found_primary_report);
+  } else {
+    EXPECT_FALSE(found_primary_report);
+  }
+}
+
 // Verify that we can change a timer's parameters during execution.
 TEST_P(AbstractEventLoopTest, TimerChangeParameters) {
   auto loop = MakePrimary();
@@ -3192,6 +3487,19 @@
                "May only send the buffer detached from this Sender");
 }
 
+// Tests that senders fail when created on the wrong node.
+TEST_P(AbstractEventLoopDeathTest, SetVersionWhileRunning) {
+  auto loop1 = MakePrimary();
+
+  loop1->OnRun([&loop1, this]() {
+    EXPECT_DEATH({ loop1->SetVersionString("abcdef"); },
+                 "timing report while running");
+    Exit();
+  });
+
+  Run();
+}
+
 int TestChannelFrequency(EventLoop *event_loop) {
   return event_loop->GetChannel<TestMessage>("/test")->frequency();
 }

diff --git a/aos/events/event_loop_tmpl.h b/aos/events/event_loop_tmpl.h
index 6c132b2..9a5fe19 100644
--- a/aos/events/event_loop_tmpl.h
+++ b/aos/events/event_loop_tmpl.h

@@ -102,6 +102,37 @@
   return false;
 }
 
+inline bool RawFetcher::FetchNextIf(std::function<bool(const Context &)> fn) {
+  DCHECK(fn);
+  const auto result = DoFetchNextIf(std::move(fn));
+  if (result.first) {
+    if (timing_.fetcher) {
+      timing_.fetcher->mutate_count(timing_.fetcher->count() + 1);
+    }
+    const monotonic_clock::time_point monotonic_time = result.second;
+    ftrace_.FormatMessage(
+        "%.*s: fetch next if: now=%" PRId64 " event=%" PRId64 " queue=%" PRIu32,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_time.time_since_epoch().count()),
+        static_cast<int64_t>(
+            context_.monotonic_event_time.time_since_epoch().count()),
+        context_.queue_index);
+    const float latency =
+        std::chrono::duration_cast<std::chrono::duration<float>>(
+            monotonic_time - context_.monotonic_event_time)
+            .count();
+    timing_.latency.Add(latency);
+    return true;
+  }
+  ftrace_.FormatMessage(
+      "%.*s: fetch next: still event=%" PRId64 " queue=%" PRIu32,
+      static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+      static_cast<int64_t>(
+          context_.monotonic_event_time.time_since_epoch().count()),
+      context_.queue_index);
+  return false;
+}
+
 inline bool RawFetcher::Fetch() {
   const auto result = DoFetch();
   if (result.first) {
@@ -132,6 +163,38 @@
   return false;
 }
 
+inline bool RawFetcher::FetchIf(std::function<bool(const Context &)> fn) {
+  DCHECK(fn);
+
+  const auto result = DoFetchIf(std::move(fn));
+  if (result.first) {
+    if (timing_.fetcher) {
+      timing_.fetcher->mutate_count(timing_.fetcher->count() + 1);
+    }
+    const monotonic_clock::time_point monotonic_time = result.second;
+    ftrace_.FormatMessage(
+        "%.*s: fetch latest: now=%" PRId64 " event=%" PRId64 " queue=%" PRIu32,
+        static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+        static_cast<int64_t>(monotonic_time.time_since_epoch().count()),
+        static_cast<int64_t>(
+            context_.monotonic_event_time.time_since_epoch().count()),
+        context_.queue_index);
+    const float latency =
+        std::chrono::duration_cast<std::chrono::duration<float>>(
+            monotonic_time - context_.monotonic_event_time)
+            .count();
+    timing_.latency.Add(latency);
+    return true;
+  }
+  ftrace_.FormatMessage(
+      "%.*s: fetch latest: still event=%" PRId64 " queue=%" PRIu32,
+      static_cast<int>(ftrace_prefix_.size()), ftrace_prefix_.data(),
+      static_cast<int64_t>(
+          context_.monotonic_event_time.time_since_epoch().count()),
+      context_.queue_index);
+  return false;
+}
+
 inline RawSender::Error RawSender::Send(size_t size) {
   return Send(size, monotonic_clock::min_time, realtime_clock::min_time,
               0xffffffffu, event_loop_->boot_uuid());

diff --git a/aos/events/event_scheduler.cc b/aos/events/event_scheduler.cc
index 115b2c7..ae50917 100644
--- a/aos/events/event_scheduler.cc
+++ b/aos/events/event_scheduler.cc

@@ -459,6 +459,10 @@
 }
 
 void EventSchedulerScheduler::TemporarilyStopAndRun(std::function<void()> fn) {
+  if (in_on_run_) {
+    LOG(FATAL)
+        << "Can't call AllowApplicationCreationDuring from an OnRun callback.";
+  }
   const bool was_running = is_running_;
   if (is_running_) {
     is_running_ = false;
@@ -475,11 +479,13 @@
   for (EventScheduler *scheduler : schedulers_) {
     scheduler->MaybeRunOnStartup();
   }
+  in_on_run_ = true;
   // We must trigger all the OnRun's *after* all the OnStartup callbacks are
   // triggered because that is the contract that we have stated.
   for (EventScheduler *scheduler : schedulers_) {
     scheduler->MaybeRunOnRun();
   }
+  in_on_run_ = false;
 }
 
 }  // namespace aos

diff --git a/aos/events/event_scheduler.h b/aos/events/event_scheduler.h
index 848a1cf..55fa9a4 100644
--- a/aos/events/event_scheduler.h
+++ b/aos/events/event_scheduler.h

@@ -357,6 +357,8 @@
 
   // True if we are running.
   bool is_running_ = false;
+
+  bool in_on_run_ = false;
   // The current time.
   distributed_clock::time_point now_ = distributed_clock::epoch();
   // List of schedulers to run in sync.

diff --git a/aos/events/logging/BUILD b/aos/events/logging/BUILD
index 2516e6c..c33606c 100644
--- a/aos/events/logging/BUILD
+++ b/aos/events/logging/BUILD

@@ -28,14 +28,31 @@
 )
 
 flatbuffer_cc_library(
+    name = "log_replayer_stats_fbs",
+    srcs = ["log_replayer_stats.fbs"],
+    gen_reflections = True,
+    target_compatible_with = ["@platforms//os:linux"],
+    deps = [
+        ":replay_config_fbs",
+    ],
+)
+
+cc_static_flatbuffer(
+    name = "log_replayer_stats_schema",
+    function = "aos::LogReplayerStatsSchema",
+    target = ":log_replayer_stats_fbs_reflection_out",
+    visibility = ["//visibility:public"],
+)
+
+flatbuffer_cc_library(
     name = "replay_config_fbs",
     srcs = ["log_replayer_config.fbs"],
     gen_reflections = True,
-    includes = [
-        "//aos:configuration_fbs_includes",
-    ],
     target_compatible_with = ["@platforms//os:linux"],
     visibility = ["//visibility:public"],
+    deps = [
+        "//aos:configuration_fbs",
+    ],
 )
 
 cc_library(
@@ -59,6 +76,8 @@
     deps = [
         ":log_reader",
         ":log_reader_utils",
+        ":log_replayer_stats_fbs",
+        ":log_replayer_stats_schema",
         ":replay_config_fbs",
         ":replay_timing_fbs",
         ":replay_timing_schema",
@@ -173,6 +192,7 @@
         "//aos:sha256",
         "//aos:uuid",
         "//aos:configuration",
+        "//aos/containers:error_list",
         "//aos:flatbuffer_merge",
         "//aos:flatbuffers",
         "//aos/containers:resizeable_buffer",
@@ -389,6 +409,8 @@
         ":logfile_utils",
         ":logger_fbs",
         "//aos:uuid",
+        "//aos/containers:error_list",
+        "//aos/containers:sized_array",
         "@com_github_google_flatbuffers//:flatbuffers",
         "@com_github_google_glog//:glog",
     ],
@@ -420,10 +442,12 @@
     ],
     hdrs = [
         "log_reader.h",
+        "replay_channels.h",
     ],
     target_compatible_with = ["@platforms//os:linux"],
     visibility = ["//visibility:public"],
     deps = [
+        ":config_remapper",
         ":log_namer",
         ":log_writer",
         ":logfile_utils",
@@ -449,6 +473,58 @@
     ],
 )
 
+cc_library(
+    name = "config_remapper",
+    srcs = [
+        "config_remapper.cc",
+    ],
+    hdrs = [
+        "config_remapper.h",
+        "replay_channels.h",
+    ],
+    target_compatible_with = ["@platforms//os:linux"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":logfile_utils",
+        ":logger_fbs",
+        "//aos/events:event_loop",
+        "//aos/network:message_bridge_server_fbs",
+        "//aos/network:multinode_timestamp_filter",
+        "//aos/network:remote_message_fbs",
+        "//aos/network:remote_message_schema",
+        "//aos/network:team_number",
+        "//aos/network:timestamp_filter",
+        "@com_github_google_flatbuffers//:flatbuffers",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_test(
+    name = "config_remapper_test",
+    srcs = ["config_remapper_test.cc"],
+    copts = select({
+        "//tools:cpu_k8": ["-DLZMA=1"],
+        "//tools:cpu_arm64": ["-DLZMA=1"],
+        "//conditions:default": [],
+    }),
+    data = [
+        ":multinode_pingpong_combined_config",
+        ":multinode_pingpong_split3_config",
+        ":multinode_pingpong_split4_config",
+        ":multinode_pingpong_split4_mixed1_config",
+        ":multinode_pingpong_split4_mixed2_config",
+        ":multinode_pingpong_split4_reliable_config",
+        ":multinode_pingpong_split_config",
+        ":multinode_pingpong_triangle_split_config",
+        "//aos/events:pingpong_config",
+    ],
+    target_compatible_with = ["@platforms//os:linux"],
+    deps = [
+        ":config_remapper",
+        ":multinode_logger_test_lib",
+    ],
+)
+
 cc_binary(
     name = "log_cat",
     srcs = [
@@ -469,6 +545,24 @@
     ],
 )
 
+cc_library(
+    name = "logfile_validator",
+    srcs = [
+        "logfile_validator.cc",
+    ],
+    hdrs = ["logfile_validator.h"],
+    target_compatible_with = ["@platforms//os:linux"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":logfile_utils",
+        "//aos:configuration",
+        "//aos/events:simulated_event_loop",
+        "//aos/network:multinode_timestamp_filter",
+        "@com_github_gflags_gflags//:gflags",
+        "@com_github_google_glog//:glog",
+    ],
+)
+
 cc_binary(
     name = "timestamp_extractor",
     srcs = [
@@ -478,6 +572,7 @@
     visibility = ["//visibility:public"],
     deps = [
         ":logfile_utils",
+        ":logfile_validator",
         "//aos:configuration",
         "//aos:init",
         "//aos/events:simulated_event_loop",
@@ -576,6 +671,7 @@
     deps = [
         ":log_reader",
         ":log_writer",
+        ":logfile_validator",
         ":snappy_encoder",
         "//aos/events:message_counter",
         "//aos/events:ping_lib",
@@ -708,6 +804,49 @@
     deps = ["//aos/events:aos_config"],
 )
 
+aos_config(
+    name = "multinode_pingpong_reboot_ooo_config",
+    src = "multinode_pingpong_reboot_ooo.json",
+    flatbuffers = [
+        "//aos/events:ping_fbs",
+        "//aos/events:pong_fbs",
+        "//aos/network:message_bridge_client_fbs",
+        "//aos/network:message_bridge_server_fbs",
+        "//aos/network:remote_message_fbs",
+        "//aos/network:timestamp_fbs",
+    ],
+    target_compatible_with = ["@platforms//os:linux"],
+    deps = ["//aos/events:aos_config"],
+)
+
+aos_config(
+    name = "multinode_pingpong_pi3_pingpong_config",
+    src = "multinode_pingpong_pi3_pingpong.json",
+    flatbuffers = [
+        "//aos/events:ping_fbs",
+        "//aos/events:pong_fbs",
+        "//aos/network:message_bridge_client_fbs",
+        "//aos/network:message_bridge_server_fbs",
+        "//aos/network:remote_message_fbs",
+        "//aos/network:timestamp_fbs",
+    ],
+    target_compatible_with = ["@platforms//os:linux"],
+    deps = ["//aos/events:aos_config"],
+)
+
+aos_config(
+    name = "multinode_single_node_config",
+    src = "multinode_single_node.json",
+    flatbuffers = [
+        "//aos/network:message_bridge_client_fbs",
+        "//aos/network:message_bridge_server_fbs",
+        "//aos/network:remote_message_fbs",
+        "//aos/network:timestamp_fbs",
+    ],
+    target_compatible_with = ["@platforms//os:linux"],
+    deps = ["//aos/events:aos_config"],
+)
+
 cc_test(
     name = "realtime_replay_test",
     srcs = ["realtime_replay_test.cc"],
@@ -752,6 +891,8 @@
     }),
     data = [
         ":multinode_pingpong_combined_config",
+        ":multinode_pingpong_pi3_pingpong_config",
+        ":multinode_pingpong_reboot_ooo_config",
         ":multinode_pingpong_split3_config",
         ":multinode_pingpong_split4_config",
         ":multinode_pingpong_split4_mixed1_config",
@@ -759,6 +900,7 @@
         ":multinode_pingpong_split4_reliable_config",
         ":multinode_pingpong_split_config",
         ":multinode_pingpong_triangle_split_config",
+        ":multinode_single_node_config",
         "//aos/events:pingpong_config",
     ],
     shard_count = 10,

diff --git a/aos/events/logging/config_remapper.cc b/aos/events/logging/config_remapper.cc
new file mode 100644
index 0000000..21f45d5
--- /dev/null
+++ b/aos/events/logging/config_remapper.cc

@@ -0,0 +1,679 @@
+#include "aos/events/logging/config_remapper.h"
+
+#include <vector>
+
+#include "absl/strings/escaping.h"
+#include "flatbuffers/flatbuffers.h"
+
+#include "aos/events/logging/logger_generated.h"
+#include "aos/flatbuffer_merge.h"
+#include "aos/json_to_flatbuffer.h"
+#include "aos/network/multinode_timestamp_filter.h"
+#include "aos/network/remote_message_generated.h"
+#include "aos/network/remote_message_schema.h"
+#include "aos/network/team_number.h"
+#include "aos/network/timestamp_channel.h"
+
+namespace aos {
+using message_bridge::RemoteMessage;
+
+namespace {
+// Checks if the specified channel name/type exists in the config and, depending
+// on the value of conflict_handling, calls conflict_handler or just dies.
+template <typename F>
+void CheckAndHandleRemapConflict(
+    std::string_view new_name, std::string_view new_type,
+    const Configuration *config,
+    ConfigRemapper::RemapConflict conflict_handling, F conflict_handler) {
+  const Channel *existing_channel =
+      configuration::GetChannel(config, new_name, new_type, "", nullptr, true);
+  if (existing_channel != nullptr) {
+    switch (conflict_handling) {
+      case ConfigRemapper::RemapConflict::kDisallow:
+        LOG(FATAL)
+            << "Channel "
+            << configuration::StrippedChannelToString(existing_channel)
+            << " is already used--you can't remap an original channel to it.";
+        break;
+      case ConfigRemapper::RemapConflict::kCascade:
+        VLOG(1) << "Automatically remapping "
+                << configuration::StrippedChannelToString(existing_channel)
+                << " to avoid conflicts.";
+        conflict_handler();
+        break;
+    }
+  }
+}
+}  // namespace
+
+namespace configuration {
+// We don't really want to expose this publicly, but log reader doesn't really
+// want to re-implement it.
+void HandleMaps(const flatbuffers::Vector<flatbuffers::Offset<Map>> *maps,
+                std::string *name, std::string_view type, const Node *node);
+}  // namespace configuration
+
+bool CompareChannels(const Channel *c,
+                     ::std::pair<std::string_view, std::string_view> p) {
+  int name_compare = c->name()->string_view().compare(p.first);
+  if (name_compare == 0) {
+    return c->type()->string_view() < p.second;
+  } else if (name_compare < 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool EqualsChannels(const Channel *c,
+                    ::std::pair<std::string_view, std::string_view> p) {
+  return c->name()->string_view() == p.first &&
+         c->type()->string_view() == p.second;
+}
+// Copies the channel, removing the schema as we go.  If new_name is provided,
+// it is used instead of the name inside the channel.  If new_type is provided,
+// it is used instead of the type in the channel.
+flatbuffers::Offset<Channel> CopyChannel(const Channel *c,
+                                         std::string_view new_name,
+                                         std::string_view new_type,
+                                         flatbuffers::FlatBufferBuilder *fbb) {
+  CHECK_EQ(Channel::MiniReflectTypeTable()->num_elems, 14u)
+      << ": Merging logic needs to be updated when the number of channel "
+         "fields changes.";
+
+  flatbuffers::Offset<flatbuffers::String> name_offset =
+      fbb->CreateSharedString(new_name.empty() ? c->name()->string_view()
+                                               : new_name);
+  flatbuffers::Offset<flatbuffers::String> type_offset =
+      fbb->CreateSharedString(new_type.empty() ? c->type()->str() : new_type);
+  flatbuffers::Offset<flatbuffers::String> source_node_offset =
+      c->has_source_node() ? fbb->CreateSharedString(c->source_node()->str())
+                           : 0;
+
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Connection>>>
+      destination_nodes_offset =
+          RecursiveCopyVectorTable(c->destination_nodes(), fbb);
+
+  flatbuffers::Offset<
+      flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
+      logger_nodes_offset = CopyVectorSharedString(c->logger_nodes(), fbb);
+
+  Channel::Builder channel_builder(*fbb);
+  channel_builder.add_name(name_offset);
+  channel_builder.add_type(type_offset);
+  if (c->has_frequency()) {
+    channel_builder.add_frequency(c->frequency());
+  }
+  if (c->has_max_size()) {
+    channel_builder.add_max_size(c->max_size());
+  }
+  if (c->has_num_senders()) {
+    channel_builder.add_num_senders(c->num_senders());
+  }
+  if (c->has_num_watchers()) {
+    channel_builder.add_num_watchers(c->num_watchers());
+  }
+  if (!source_node_offset.IsNull()) {
+    channel_builder.add_source_node(source_node_offset);
+  }
+  if (!destination_nodes_offset.IsNull()) {
+    channel_builder.add_destination_nodes(destination_nodes_offset);
+  }
+  if (c->has_logger()) {
+    channel_builder.add_logger(c->logger());
+  }
+  if (!logger_nodes_offset.IsNull()) {
+    channel_builder.add_logger_nodes(logger_nodes_offset);
+  }
+  if (c->has_read_method()) {
+    channel_builder.add_read_method(c->read_method());
+  }
+  if (c->has_num_readers()) {
+    channel_builder.add_num_readers(c->num_readers());
+  }
+  if (c->has_channel_storage_duration()) {
+    channel_builder.add_channel_storage_duration(c->channel_storage_duration());
+  }
+  return channel_builder.Finish();
+}
+
+ConfigRemapper::ConfigRemapper(const Configuration *config,
+                               const Configuration *replay_config,
+                               const logger::ReplayChannels *replay_channels)
+    : remapped_configuration_(config),
+      original_configuration_(config),
+      replay_configuration_(replay_config),
+      replay_channels_(replay_channels) {
+  MakeRemappedConfig();
+
+  // If any remote timestamp channel was not marked NOT_LOGGED, then remap that
+  // channel to avoid the redundant logged data. Also, this loop handles the
+  // MessageHeader to RemoteMessae name change.
+  // Note: This path is mainly for backwards compatibility reasons, and should
+  // not be necessary for any new logs.
+  for (const Node *node : configuration::GetNodes(original_configuration())) {
+    message_bridge::ChannelTimestampFinder finder(original_configuration(),
+                                                  "log_reader", node);
+
+    absl::btree_set<std::string_view> remote_nodes;
+
+    for (const Channel *channel : *original_configuration()->channels()) {
+      if (!configuration::ChannelIsSendableOnNode(channel, node)) {
+        continue;
+      }
+      if (!channel->has_destination_nodes()) {
+        continue;
+      }
+      for (const Connection *connection : *channel->destination_nodes()) {
+        if (configuration::ConnectionDeliveryTimeIsLoggedOnNode(connection,
+                                                                node)) {
+          // Start by seeing if the split timestamp channels are being used for
+          // this message.
+          const Channel *timestamp_channel = configuration::GetChannel(
+              original_configuration(),
+              finder.SplitChannelName(channel, connection),
+              RemoteMessage::GetFullyQualifiedName(), "", node, true);
+
+          if (timestamp_channel != nullptr) {
+            // If for some reason a timestamp channel is not NOT_LOGGED (which
+            // is unusual), then remap the channel so that the replayed channel
+            // doesn't overlap with the special separate replay we do for
+            // timestamps.
+            if (timestamp_channel->logger() != LoggerConfig::NOT_LOGGED) {
+              RemapOriginalChannel<RemoteMessage>(
+                  timestamp_channel->name()->string_view(), node);
+            }
+            continue;
+          }
+
+          // Otherwise collect this one up as a node to look for a combined
+          // channel from.  It is more efficient to compare nodes than channels.
+          LOG(WARNING) << "Failed to find channel "
+                       << finder.SplitChannelName(channel, connection)
+                       << " on node " << FlatbufferToJson(node);
+          remote_nodes.insert(connection->name()->string_view());
+        }
+      }
+    }
+
+    std::vector<const Node *> timestamp_logger_nodes =
+        configuration::TimestampNodes(original_configuration(), node);
+    for (const std::string_view remote_node : remote_nodes) {
+      const std::string channel = finder.CombinedChannelName(remote_node);
+
+      // See if the log file is an old log with logger::MessageHeader channels
+      // in it, or a newer log with RemoteMessage.  If we find an older log,
+      // rename the type too along with the name.
+      if (HasChannel<logger::MessageHeader>(channel, node)) {
+        CHECK(!HasChannel<RemoteMessage>(channel, node))
+            << ": Can't have both a logger::MessageHeader and RemoteMessage "
+               "remote "
+               "timestamp channel.";
+        // In theory, we should check NOT_LOGGED like RemoteMessage and be more
+        // careful about updating the config, but there are fewer and fewer logs
+        // with logger::MessageHeader remote messages, so it isn't worth the
+        // effort.
+        RemapOriginalChannel<logger::MessageHeader>(
+            channel, node, "/original", "aos.message_bridge.RemoteMessage");
+      } else {
+        CHECK(HasChannel<RemoteMessage>(channel, node))
+            << ": Failed to find {\"name\": \"" << channel << "\", \"type\": \""
+            << RemoteMessage::GetFullyQualifiedName() << "\"} for node "
+            << node->name()->string_view();
+        // Only bother to remap if there's something on the channel.  We can
+        // tell if the channel was marked NOT_LOGGED or not.  This makes the
+        // config not change un-necesarily when we replay a log with NOT_LOGGED
+        // messages.
+        if (HasOriginalChannel<RemoteMessage>(channel, node)) {
+          RemapOriginalChannel<RemoteMessage>(channel, node);
+        }
+      }
+    }
+  }
+  if (replay_configuration_) {
+    CHECK_EQ(configuration::MultiNode(remapped_configuration()),
+             configuration::MultiNode(replay_configuration_))
+        << ": Log file and replay config need to both be multi or single "
+           "node.";
+  }
+}
+
+ConfigRemapper::~ConfigRemapper() {
+  // Zero out some buffers. It's easy to do use-after-frees on these, so make
+  // it more obvious.
+  if (remapped_configuration_buffer_) {
+    remapped_configuration_buffer_->Wipe();
+  }
+}
+
+const Configuration *ConfigRemapper::original_configuration() const {
+  return original_configuration_;
+}
+
+const Configuration *ConfigRemapper::remapped_configuration() const {
+  return remapped_configuration_;
+}
+
+void ConfigRemapper::set_configuration(const Configuration *configuration) {
+  remapped_configuration_ = configuration;
+}
+
+std::vector<const Channel *> ConfigRemapper::RemappedChannels() const {
+  std::vector<const Channel *> result;
+  result.reserve(remapped_channels_.size());
+  for (auto &pair : remapped_channels_) {
+    const Channel *const original_channel =
+        CHECK_NOTNULL(original_configuration()->channels()->Get(pair.first));
+
+    auto channel_iterator = std::lower_bound(
+        remapped_configuration_->channels()->cbegin(),
+        remapped_configuration_->channels()->cend(),
+        std::make_pair(std::string_view(pair.second.remapped_name),
+                       original_channel->type()->string_view()),
+        CompareChannels);
+
+    CHECK(channel_iterator != remapped_configuration_->channels()->cend());
+    CHECK(EqualsChannels(
+        *channel_iterator,
+        std::make_pair(std::string_view(pair.second.remapped_name),
+                       original_channel->type()->string_view())));
+    result.push_back(*channel_iterator);
+  }
+  return result;
+}
+
+const Channel *ConfigRemapper::RemapChannel(const EventLoop *event_loop,
+                                            const Node *node,
+                                            const Channel *channel) {
+  std::string_view channel_name = channel->name()->string_view();
+  std::string_view channel_type = channel->type()->string_view();
+  const int channel_index =
+      configuration::ChannelIndex(original_configuration(), channel);
+  // If the channel is remapped, find the correct channel name to use.
+  if (remapped_channels_.count(channel_index) > 0) {
+    VLOG(3) << "Got remapped channel on "
+            << configuration::CleanedChannelToString(channel);
+    channel_name = remapped_channels_[channel_index].remapped_name;
+  }
+
+  VLOG(2) << "Going to remap channel " << channel_name << " " << channel_type;
+  const Channel *remapped_channel = configuration::GetChannel(
+      remapped_configuration(), channel_name, channel_type,
+      event_loop ? event_loop->name() : "log_reader", node);
+
+  CHECK(remapped_channel != nullptr)
+      << ": Unable to send {\"name\": \"" << channel_name << "\", \"type\": \""
+      << channel_type << "\"} because it is not in the provided configuration.";
+
+  return remapped_channel;
+}
+
+void ConfigRemapper::RemapOriginalChannel(std::string_view name,
+                                          std::string_view type,
+                                          std::string_view add_prefix,
+                                          std::string_view new_type,
+                                          RemapConflict conflict_handling) {
+  RemapOriginalChannel(name, type, nullptr, add_prefix, new_type,
+                       conflict_handling);
+}
+
+void ConfigRemapper::RemapOriginalChannel(std::string_view name,
+                                          std::string_view type,
+                                          const Node *node,
+                                          std::string_view add_prefix,
+                                          std::string_view new_type,
+                                          RemapConflict conflict_handling) {
+  if (node != nullptr) {
+    VLOG(1) << "Node is " << FlatbufferToJson(node);
+  }
+  if (replay_channels_ != nullptr) {
+    CHECK(std::find(replay_channels_->begin(), replay_channels_->end(),
+                    std::make_pair(std::string{name}, std::string{type})) !=
+          replay_channels_->end())
+        << "Attempted to remap channel " << name << " " << type
+        << " which is not included in the replay channels passed to "
+           "ConfigRemapper.";
+  }
+  const Channel *remapped_channel =
+      configuration::GetChannel(original_configuration(), name, type, "", node);
+  CHECK(remapped_channel != nullptr) << ": Failed to find {\"name\": \"" << name
+                                     << "\", \"type\": \"" << type << "\"}";
+  VLOG(1) << "Original {\"name\": \"" << name << "\", \"type\": \"" << type
+          << "\"}";
+  VLOG(1) << "Remapped "
+          << configuration::StrippedChannelToString(remapped_channel);
+
+  // We want to make /spray on node 0 go to /0/spray by snooping the maps.  And
+  // we want it to degrade if the heuristics fail to just work.
+  //
+  // The easiest way to do this is going to be incredibly specific and verbose.
+  // Look up /spray, to /0/spray.  Then, prefix the result with /original to get
+  // /original/0/spray.  Then, create a map from /original/spray to
+  // /original/0/spray for just the type we were asked for.
+  if (name != remapped_channel->name()->string_view()) {
+    MapT new_map;
+    new_map.match = std::make_unique<ChannelT>();
+    new_map.match->name = absl::StrCat(add_prefix, name);
+    new_map.match->type = type;
+    if (node != nullptr) {
+      new_map.match->source_node = node->name()->str();
+    }
+    new_map.rename = std::make_unique<ChannelT>();
+    new_map.rename->name =
+        absl::StrCat(add_prefix, remapped_channel->name()->string_view());
+    maps_.emplace_back(std::move(new_map));
+  }
+
+  // Then remap the original channel to the prefixed channel.
+  const size_t channel_index =
+      configuration::ChannelIndex(original_configuration(), remapped_channel);
+  CHECK_EQ(0u, remapped_channels_.count(channel_index))
+      << "Already remapped channel "
+      << configuration::CleanedChannelToString(remapped_channel);
+
+  RemappedChannel remapped_channel_struct;
+  remapped_channel_struct.remapped_name =
+      std::string(add_prefix) +
+      std::string(remapped_channel->name()->string_view());
+  remapped_channel_struct.new_type = new_type;
+  const std::string_view remapped_type = new_type.empty() ? type : new_type;
+  CheckAndHandleRemapConflict(
+      remapped_channel_struct.remapped_name, remapped_type,
+      remapped_configuration_, conflict_handling,
+      [this, &remapped_channel_struct, remapped_type, node, add_prefix,
+       conflict_handling]() {
+        RemapOriginalChannel(remapped_channel_struct.remapped_name,
+                             remapped_type, node, add_prefix, "",
+                             conflict_handling);
+      });
+  remapped_channels_[channel_index] = std::move(remapped_channel_struct);
+  MakeRemappedConfig();
+}
+
+void ConfigRemapper::RenameOriginalChannel(const std::string_view name,
+                                           const std::string_view type,
+                                           const std::string_view new_name,
+                                           const std::vector<MapT> &add_maps) {
+  RenameOriginalChannel(name, type, nullptr, new_name, add_maps);
+}
+
+void ConfigRemapper::RenameOriginalChannel(const std::string_view name,
+                                           const std::string_view type,
+                                           const Node *const node,
+                                           const std::string_view new_name,
+                                           const std::vector<MapT> &add_maps) {
+  if (node != nullptr) {
+    VLOG(1) << "Node is " << FlatbufferToJson(node);
+  }
+  // First find the channel and rename it.
+  const Channel *remapped_channel =
+      configuration::GetChannel(original_configuration(), name, type, "", node);
+  CHECK(remapped_channel != nullptr) << ": Failed to find {\"name\": \"" << name
+                                     << "\", \"type\": \"" << type << "\"}";
+  VLOG(1) << "Original {\"name\": \"" << name << "\", \"type\": \"" << type
+          << "\"}";
+  VLOG(1) << "Remapped "
+          << configuration::StrippedChannelToString(remapped_channel);
+
+  const size_t channel_index =
+      configuration::ChannelIndex(original_configuration(), remapped_channel);
+  CHECK_EQ(0u, remapped_channels_.count(channel_index))
+      << "Already remapped channel "
+      << configuration::CleanedChannelToString(remapped_channel);
+
+  RemappedChannel remapped_channel_struct;
+  remapped_channel_struct.remapped_name = new_name;
+  remapped_channel_struct.new_type.clear();
+  remapped_channels_[channel_index] = std::move(remapped_channel_struct);
+
+  // Then add any provided maps.
+  for (const MapT &map : add_maps) {
+    maps_.push_back(map);
+  }
+
+  // Finally rewrite the config.
+  MakeRemappedConfig();
+}
+
+void ConfigRemapper::MakeRemappedConfig() {
+  // If no remapping occurred and we are using the original config, then there
+  // is nothing interesting to do here.
+  if (remapped_channels_.empty() && replay_configuration_ == nullptr) {
+    remapped_configuration_ = original_configuration();
+    return;
+  }
+  // Config to copy Channel definitions from. Use the specified
+  // replay_configuration_ if it has been provided.
+  const Configuration *const base_config = replay_configuration_ == nullptr
+                                               ? original_configuration()
+                                               : replay_configuration_;
+
+  // Create a config with all the channels, but un-sorted/merged.  Collect up
+  // the schemas while we do this.  Call MergeConfiguration to sort everything,
+  // and then merge it all in together.
+
+  // This is the builder that we use for the config containing all the new
+  // channels.
+  flatbuffers::FlatBufferBuilder fbb;
+  fbb.ForceDefaults(true);
+  std::vector<flatbuffers::Offset<Channel>> channel_offsets;
+
+  CHECK_EQ(Channel::MiniReflectTypeTable()->num_elems, 14u)
+      << ": Merging logic needs to be updated when the number of channel "
+         "fields changes.";
+
+  // List of schemas.
+  std::map<std::string_view, FlatbufferVector<reflection::Schema>> schema_map;
+  // Make sure our new RemoteMessage schema is in there for old logs without it.
+  schema_map.insert(std::make_pair(
+      message_bridge::RemoteMessage::GetFullyQualifiedName(),
+      FlatbufferVector<reflection::Schema>(FlatbufferSpan<reflection::Schema>(
+          message_bridge::RemoteMessageSchema()))));
+
+  // Reconstruct the remapped channels.
+  for (auto &pair : remapped_channels_) {
+    const Channel *const c = CHECK_NOTNULL(configuration::GetChannel(
+        base_config, original_configuration()->channels()->Get(pair.first), "",
+        nullptr));
+    channel_offsets.emplace_back(
+        CopyChannel(c, pair.second.remapped_name, "", &fbb));
+
+    if (c->has_destination_nodes()) {
+      for (const Connection *connection : *c->destination_nodes()) {
+        switch (connection->timestamp_logger()) {
+          case LoggerConfig::LOCAL_LOGGER:
+          case LoggerConfig::NOT_LOGGED:
+            // There is no timestamp channel associated with this, so ignore it.
+            break;
+
+          case LoggerConfig::REMOTE_LOGGER:
+          case LoggerConfig::LOCAL_AND_REMOTE_LOGGER:
+            // We want to make a split timestamp channel regardless of what type
+            // of log this used to be.  No sense propagating the single
+            // timestamp channel.
+
+            CHECK(connection->has_timestamp_logger_nodes());
+            for (const flatbuffers::String *timestamp_logger_node :
+                 *connection->timestamp_logger_nodes()) {
+              const Node *node =
+                  configuration::GetNode(original_configuration(),
+                                         timestamp_logger_node->string_view());
+              message_bridge::ChannelTimestampFinder finder(
+                  original_configuration(), "log_reader", node);
+
+              // We are assuming here that all the maps are setup correctly to
+              // handle arbitrary timestamps.  Apply the maps for this node to
+              // see what name this ends up with.
+              std::string name = finder.SplitChannelName(
+                  pair.second.remapped_name, c->type()->str(), connection);
+              std::string unmapped_name = name;
+              configuration::HandleMaps(original_configuration()->maps(), &name,
+                                        "aos.message_bridge.RemoteMessage",
+                                        node);
+              CHECK_NE(name, unmapped_name)
+                  << ": Remote timestamp channel was not remapped, this is "
+                     "very fishy";
+              flatbuffers::Offset<flatbuffers::String> channel_name_offset =
+                  fbb.CreateString(name);
+              flatbuffers::Offset<flatbuffers::String> channel_type_offset =
+                  fbb.CreateString("aos.message_bridge.RemoteMessage");
+              flatbuffers::Offset<flatbuffers::String> source_node_offset =
+                  fbb.CreateString(timestamp_logger_node->string_view());
+
+              // Now, build a channel.  Don't log it, 2 senders, and match the
+              // source frequency.
+              Channel::Builder channel_builder(fbb);
+              channel_builder.add_name(channel_name_offset);
+              channel_builder.add_type(channel_type_offset);
+              channel_builder.add_source_node(source_node_offset);
+              channel_builder.add_logger(LoggerConfig::NOT_LOGGED);
+              channel_builder.add_num_senders(2);
+              if (c->has_frequency()) {
+                channel_builder.add_frequency(c->frequency());
+              }
+              if (c->has_channel_storage_duration()) {
+                channel_builder.add_channel_storage_duration(
+                    c->channel_storage_duration());
+              }
+              channel_offsets.emplace_back(channel_builder.Finish());
+            }
+            break;
+        }
+      }
+    }
+  }
+
+  // Now reconstruct the original channels, translating types as needed
+  for (const Channel *c : *base_config->channels()) {
+    // Search for a mapping channel.
+    std::string_view new_type = "";
+    for (auto &pair : remapped_channels_) {
+      const Channel *const remapped_channel =
+          original_configuration()->channels()->Get(pair.first);
+      if (remapped_channel->name()->string_view() == c->name()->string_view() &&
+          remapped_channel->type()->string_view() == c->type()->string_view()) {
+        new_type = pair.second.new_type;
+        break;
+      }
+    }
+
+    // Copy everything over.
+    channel_offsets.emplace_back(CopyChannel(c, "", new_type, &fbb));
+
+    // Add the schema if it doesn't exist.
+    if (schema_map.find(c->type()->string_view()) == schema_map.end()) {
+      CHECK(c->has_schema());
+      schema_map.insert(std::make_pair(c->type()->string_view(),
+                                       RecursiveCopyFlatBuffer(c->schema())));
+    }
+  }
+
+  // The MergeConfiguration API takes a vector, not a map.  Convert.
+  std::vector<FlatbufferVector<reflection::Schema>> schemas;
+  while (!schema_map.empty()) {
+    schemas.emplace_back(std::move(schema_map.begin()->second));
+    schema_map.erase(schema_map.begin());
+  }
+
+  // Create the Configuration containing the new channels that we want to add.
+  const flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Channel>>>
+      channels_offset =
+          channel_offsets.empty() ? 0 : fbb.CreateVector(channel_offsets);
+
+  // Copy over the old maps.
+  std::vector<flatbuffers::Offset<Map>> map_offsets;
+  if (base_config->maps()) {
+    for (const Map *map : *base_config->maps()) {
+      map_offsets.emplace_back(RecursiveCopyFlatBuffer(map, &fbb));
+    }
+  }
+
+  // Now create the new maps.  These are second so they take effect first.
+  for (const MapT &map : maps_) {
+    CHECK(!map.match->name.empty());
+    const flatbuffers::Offset<flatbuffers::String> match_name_offset =
+        fbb.CreateString(map.match->name);
+    flatbuffers::Offset<flatbuffers::String> match_type_offset;
+    if (!map.match->type.empty()) {
+      match_type_offset = fbb.CreateString(map.match->type);
+    }
+    flatbuffers::Offset<flatbuffers::String> match_source_node_offset;
+    if (!map.match->source_node.empty()) {
+      match_source_node_offset = fbb.CreateString(map.match->source_node);
+    }
+    CHECK(!map.rename->name.empty());
+    const flatbuffers::Offset<flatbuffers::String> rename_name_offset =
+        fbb.CreateString(map.rename->name);
+    Channel::Builder match_builder(fbb);
+    match_builder.add_name(match_name_offset);
+    if (!match_type_offset.IsNull()) {
+      match_builder.add_type(match_type_offset);
+    }
+    if (!match_source_node_offset.IsNull()) {
+      match_builder.add_source_node(match_source_node_offset);
+    }
+    const flatbuffers::Offset<Channel> match_offset = match_builder.Finish();
+
+    Channel::Builder rename_builder(fbb);
+    rename_builder.add_name(rename_name_offset);
+    const flatbuffers::Offset<Channel> rename_offset = rename_builder.Finish();
+
+    Map::Builder map_builder(fbb);
+    map_builder.add_match(match_offset);
+    map_builder.add_rename(rename_offset);
+    map_offsets.emplace_back(map_builder.Finish());
+  }
+
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Map>>>
+      maps_offsets = map_offsets.empty() ? 0 : fbb.CreateVector(map_offsets);
+
+  // And copy everything else over.
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Node>>>
+      nodes_offset = RecursiveCopyVectorTable(base_config->nodes(), &fbb);
+
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Application>>>
+      applications_offset =
+          RecursiveCopyVectorTable(base_config->applications(), &fbb);
+
+  // Now insert everything else in unmodified.
+  ConfigurationBuilder configuration_builder(fbb);
+  if (!channels_offset.IsNull()) {
+    configuration_builder.add_channels(channels_offset);
+  }
+  if (!maps_offsets.IsNull()) {
+    configuration_builder.add_maps(maps_offsets);
+  }
+  if (!nodes_offset.IsNull()) {
+    configuration_builder.add_nodes(nodes_offset);
+  }
+  if (!applications_offset.IsNull()) {
+    configuration_builder.add_applications(applications_offset);
+  }
+
+  if (base_config->has_channel_storage_duration()) {
+    configuration_builder.add_channel_storage_duration(
+        base_config->channel_storage_duration());
+  }
+
+  CHECK_EQ(Configuration::MiniReflectTypeTable()->num_elems, 6u)
+      << ": Merging logic needs to be updated when the number of configuration "
+         "fields changes.";
+
+  fbb.Finish(configuration_builder.Finish());
+
+  // Clean it up and return it!  By using MergeConfiguration here, we'll
+  // actually get a deduplicated config for free too.
+  FlatbufferDetachedBuffer<Configuration> new_merged_config =
+      configuration::MergeConfiguration(
+          FlatbufferDetachedBuffer<Configuration>(fbb.Release()));
+
+  remapped_configuration_buffer_ =
+      std::make_unique<FlatbufferDetachedBuffer<Configuration>>(
+          configuration::MergeConfiguration(new_merged_config, schemas));
+
+  remapped_configuration_ = &remapped_configuration_buffer_->message();
+
+  // TODO(austin): Lazily re-build to save CPU?
+}
+
+}  // namespace aos

diff --git a/aos/events/logging/config_remapper.h b/aos/events/logging/config_remapper.h
new file mode 100644
index 0000000..d1eb0c6
--- /dev/null
+++ b/aos/events/logging/config_remapper.h

@@ -0,0 +1,196 @@
+#ifndef AOS_EVENTS_LOGGING_CONFIG_REMAPPER_H_
+#define AOS_EVENTS_LOGGING_CONFIG_REMAPPER_H_
+
+#include <map>
+#include <string_view>
+#include <tuple>
+#include <vector>
+
+#include "flatbuffers/flatbuffers.h"
+
+#include "aos/events/event_loop.h"
+#include "aos/events/logging/logger_generated.h"
+#include "aos/events/logging/replay_channels.h"
+
+namespace aos {
+
+// This class is used for remapping and renaming channels with the passed in
+// configuration to the constructor. Typically, the templated versions of
+// RemapOriginalChannel and RenameOriginalChannel are the main functions to use
+// for type safety. After remapping and renaming, the remapped configuration can
+// be accessed through remapped_configuration(), and the original configuration
+// that is not mutated can be accessed through original_configuration.
+//
+// This class assumes no ownership over any pointers provided to it.
+//
+// Timestamp channels are automatically remapped on construction
+//
+// Note: This class does not need logfiles to function unlike LogReader. This
+// logic originally lived in LogReader and was refactored out into this class.
+// The same API for remapping and renaming still exists in LogReader which now
+// just passes along the args to this class.
+class ConfigRemapper {
+ public:
+  ConfigRemapper(const Configuration *config,
+                 const Configuration *replay_config = nullptr,
+                 const logger::ReplayChannels *replay_channels = nullptr);
+  ~ConfigRemapper();
+
+  // Map of channel indices to new name. The channel index will be an index into
+  // original_configuration(), and the string key will be the name of the
+  // channel to send on instead of the orignal channel name.
+  struct RemappedChannel {
+    std::string remapped_name;
+    std::string new_type;
+  };
+
+  // Enum to use for indicating how RemapOriginalChannel behaves when there is
+  // already a channel with the remapped name (e.g., as may happen when
+  // replaying a logfile that was itself generated from replay).
+  enum class RemapConflict {
+    // LOG(FATAL) on conflicts in remappings.
+    kDisallow,
+    // If we run into a conflict, attempt to remap the channel we would be
+    // overriding (and continue to do so if remapping *that* channel also
+    // generates a conflict).
+    // This will mean that if we repeatedly replay a log, we will end up
+    // stacking more and more /original's on the start of the oldest version
+    // of the channels.
+    kCascade
+  };
+
+  // Remaps a channel from the original configuration passed to the constructor
+  // to the given one. This operates on raw channel names, without any node or
+  // application specific mappings.
+  void RemapOriginalChannel(
+      std::string_view name, std::string_view type,
+      std::string_view add_prefix = "/original", std::string_view new_type = "",
+      RemapConflict conflict_handling = RemapConflict::kCascade);
+  template <typename T>
+  void RemapOriginalChannel(
+      std::string_view name, std::string_view add_prefix = "/original",
+      std::string_view new_type = "",
+      RemapConflict conflict_handling = RemapConflict::kCascade) {
+    RemapOriginalChannel(name, T::GetFullyQualifiedName(), add_prefix, new_type,
+                         conflict_handling);
+  }
+
+  // Remaps the provided channel, though this respects node mappings, and
+  // preserves them too.  This makes it so if /aos -> /pi1/aos on one node,
+  // /original/aos -> /original/pi1/aos on the same node after renaming, just
+  // like you would hope.  If new_type is not empty, the new channel will use
+  // the provided type instead.  This allows for renaming messages.
+  //
+  // TODO(austin): If you have 2 nodes remapping something to the same channel,
+  // this doesn't handle that.  No use cases exist yet for that, so it isn't
+  // being done yet.
+  void RemapOriginalChannel(
+      std::string_view name, std::string_view type, const Node *node,
+      std::string_view add_prefix = "/original", std::string_view new_type = "",
+      RemapConflict conflict_handling = RemapConflict::kCascade);
+
+  template <typename T>
+  void RemapOriginalChannel(
+      std::string_view name, const Node *node,
+      std::string_view add_prefix = "/original", std::string_view new_type = "",
+      RemapConflict conflict_handling = RemapConflict::kCascade) {
+    RemapOriginalChannel(name, T::GetFullyQualifiedName(), node, add_prefix,
+                         new_type, conflict_handling);
+  }
+
+  // Similar to RemapOriginalChannel(), but lets you specify a name for the new
+  // channel without constraints. By default, this will not add any maps for the
+  // new channel. Use add_maps to specify any maps you'd like added.
+  void RenameOriginalChannel(std::string_view name, std::string_view type,
+                             std::string_view new_name,
+                             const std::vector<MapT> &add_maps = {});
+  template <typename T>
+  void RenameOriginalChannel(std::string_view name, std::string_view new_name,
+                             const std::vector<MapT> &add_maps = {}) {
+    RenameOriginalChannel(name, T::GetFullyQualifiedName(), new_name, add_maps);
+  }
+  // The following overloads are more suitable for multi-node configurations,
+  // and let you rename a channel on a specific node.
+  void RenameOriginalChannel(std::string_view name, std::string_view type,
+                             const Node *node, std::string_view new_name,
+                             const std::vector<MapT> &add_maps = {});
+  template <typename T>
+  void RenameOriginalChannel(std::string_view name, const Node *node,
+                             std::string_view new_name,
+                             const std::vector<MapT> &add_maps = {}) {
+    RenameOriginalChannel(name, T::GetFullyQualifiedName(), node, new_name,
+                          add_maps);
+  }
+
+  template <typename T>
+  bool HasChannel(std::string_view name, const Node *node = nullptr) {
+    return HasChannel(name, T::GetFullyQualifiedName(), node);
+  }
+  bool HasChannel(std::string_view name, std::string_view type,
+                  const Node *node) {
+    return configuration::GetChannel(original_configuration(), name, type, "",
+                                     node, true) != nullptr;
+  }
+
+  // Returns true if the channel exists on the node and was in the original
+  // config
+  template <typename T>
+  bool HasOriginalChannel(std::string_view name, const Node *node = nullptr) {
+    const Channel *channel =
+        configuration::GetChannel(original_configuration(), name,
+                                  T::GetFullyQualifiedName(), "", node, true);
+    if (channel == nullptr) return false;
+    return channel->logger() != LoggerConfig::NOT_LOGGED;
+  }
+
+  template <typename T>
+  void MaybeRemapOriginalChannel(std::string_view name,
+                                 const Node *node = nullptr) {
+    if (HasChannel<T>(name, node)) {
+      RemapOriginalChannel<T>(name, node);
+    }
+  }
+  template <typename T>
+  void MaybeRenameOriginalChannel(std::string_view name, const Node *node,
+                                  std::string_view new_name,
+                                  const std::vector<MapT> &add_maps = {}) {
+    if (HasChannel<T>(name, node)) {
+      RenameOriginalChannel<T>(name, node, new_name, add_maps);
+    }
+  }
+
+  const Channel *RemapChannel(const EventLoop *event_loop, const Node *node,
+                              const Channel *channel);
+  // Returns a list of all the original channels from remapping.
+  std::vector<const Channel *> RemappedChannels() const;
+
+  void set_configuration(const Configuration *configuration);
+
+  // Returns the configuration that was originally passed to the constructor.
+  // This class does not own this pointer.
+  const Configuration *original_configuration() const;
+
+  // Returns the configuration that contains the remapping and renamings done on
+  // the original configuration. The pointer is invalidated whenever
+  // RemapOriginalChannel is called.
+  const Configuration *remapped_configuration() const;
+
+ private:
+  // Handle constructing a configuration with all the additional remapped
+  // channels from calls to RemapOriginalChannel.
+  void MakeRemappedConfig();
+
+  std::map<size_t, RemappedChannel> remapped_channels_;
+  std::vector<MapT> maps_;
+  std::unique_ptr<FlatbufferDetachedBuffer<Configuration>>
+      remapped_configuration_buffer_;
+
+  const Configuration *remapped_configuration_ = nullptr;
+  const Configuration *original_configuration_ = nullptr;
+  const Configuration *replay_configuration_ = nullptr;
+
+  const logger::ReplayChannels *replay_channels_ = nullptr;
+};  // class ConfigRemapper
+
+}  // namespace aos
+#endif  // AOS_EVENTS_LOGGING_CONFIG_REMAPPER_H_

diff --git a/aos/events/logging/config_remapper_test.cc b/aos/events/logging/config_remapper_test.cc
new file mode 100644
index 0000000..a1fedb2
--- /dev/null
+++ b/aos/events/logging/config_remapper_test.cc

@@ -0,0 +1,101 @@
+#include "aos/events/logging/config_remapper.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "aos/events/event_loop_generated.h"
+#include "aos/events/logging/log_reader.h"
+#include "aos/events/logging/multinode_logger_test_lib.h"
+#include "aos/events/message_counter.h"
+#include "aos/events/ping_lib.h"
+#include "aos/events/pong_lib.h"
+#include "aos/network/remote_message_generated.h"
+#include "aos/network/timestamp_generated.h"
+#include "aos/testing/tmpdir.h"
+#include "multinode_logger_test_lib.h"
+
+namespace aos {
+namespace testing {
+using namespace logger::testing;
+using namespace logger;
+namespace chrono = std::chrono;
+
+using ConfigRemapperTest = MultinodeLoggerTest;
+
+INSTANTIATE_TEST_SUITE_P(
+    All, ConfigRemapperTest,
+    ::testing::Combine(
+        ::testing::Values(
+            ConfigParams{"multinode_pingpong_combined_config.json", true,
+                         kCombinedConfigSha1(), kCombinedConfigSha1(),
+                         FileStrategy::kCombine,
+                         ForceTimestampBuffering::kForceBufferTimestamps},
+            ConfigParams{"multinode_pingpong_split_config.json", false,
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kCombine,
+                         ForceTimestampBuffering::kForceBufferTimestamps}),
+        ::testing::ValuesIn(SupportedCompressionAlgorithms())));
+
+// Tests that we can read a config and remap a channel
+TEST_P(ConfigRemapperTest, RemapOriginalChannel) {
+  ConfigRemapper remapper(&config_.message());
+
+  remapper.RemapOriginalChannel<examples::Ping>("/test");
+
+  const Channel *channel = configuration::GetChannel<examples::Ping>(
+      remapper.remapped_configuration(), "/original/test", "pi1", nullptr);
+  EXPECT_NE(channel, nullptr);
+  EXPECT_EQ(channel->name()->string_view(), "/original/test");
+  EXPECT_EQ(channel->type()->string_view(), "aos.examples.Ping");
+}
+
+// Tests that we can read a config and rename a channel
+TEST_P(ConfigRemapperTest, RenameOriginalChannel) {
+  ConfigRemapper remapper(&config_.message());
+
+  remapper.RenameOriginalChannel<examples::Ping>("/test", "/original/test");
+
+  const Channel *channel = configuration::GetChannel<examples::Ping>(
+      remapper.remapped_configuration(), "/original/test", "pi1", nullptr);
+  EXPECT_NE(channel, nullptr);
+  EXPECT_EQ(channel->name()->string_view(), "/original/test");
+  EXPECT_EQ(channel->type()->string_view(), "aos.examples.Ping");
+}
+
+// Tests that we can remap a channel specifying a certain node
+TEST_P(ConfigRemapperTest, RemapOriginalChannelWithNode) {
+  ConfigRemapper remapper(&config_.message());
+
+  const Node *node =
+      configuration::GetNode(remapper.remapped_configuration(), "pi1");
+
+  // Remap just on pi1.
+  remapper.RemapOriginalChannel<aos::timing::Report>("/aos", node);
+
+  const Channel *channel = configuration::GetChannel<aos::timing::Report>(
+      remapper.remapped_configuration(), "/original/pi1/aos", "pi1", node);
+  EXPECT_NE(channel, nullptr);
+  EXPECT_EQ(channel->name()->string_view(), "/original/pi1/aos");
+  EXPECT_EQ(channel->type()->string_view(), "aos.timing.Report");
+}
+
+// Tests that we can rename a channel specifying a certain node
+TEST_P(ConfigRemapperTest, RenameOriginalChannelWithNode) {
+  ConfigRemapper remapper(&config_.message());
+
+  const Node *node =
+      configuration::GetNode(remapper.remapped_configuration(), "pi1");
+
+  // Rename just on pi1.
+  remapper.RenameOriginalChannel<aos::timing::Report>("/aos", node,
+                                                      "/original/pi1/aos");
+
+  const Channel *channel = configuration::GetChannel<aos::timing::Report>(
+      remapper.remapped_configuration(), "/original/pi1/aos", "pi1", node);
+  EXPECT_NE(channel, nullptr);
+  EXPECT_EQ(channel->name()->string_view(), "/original/pi1/aos");
+  EXPECT_EQ(channel->type()->string_view(), "aos.timing.Report");
+}
+
+}  // namespace testing
+}  // namespace aos

diff --git a/aos/events/logging/log_backend.h b/aos/events/logging/log_backend.h
index 3a47f42..91f048c 100644
--- a/aos/events/logging/log_backend.h
+++ b/aos/events/logging/log_backend.h

@@ -269,10 +269,6 @@
   // in memory. id is usually generated by log namer and looks like name of the
   // file within a log folder.
   virtual std::unique_ptr<LogSink> RequestFile(const std::string_view id) = 0;
-
-  std::unique_ptr<LogSink> RequestFile(const LogSource::File &id) {
-    return RequestFile(id.name);
-  }
 };
 
 // Implements requests log files from file system.

diff --git a/aos/events/logging/log_namer.cc b/aos/events/logging/log_namer.cc
index 28223b2..6682ae3 100644
--- a/aos/events/logging/log_namer.cc
+++ b/aos/events/logging/log_namer.cc

@@ -10,6 +10,8 @@
 #include "flatbuffers/flatbuffers.h"
 #include "glog/logging.h"
 
+#include "aos/containers/error_list.h"
+#include "aos/containers/sized_array.h"
 #include "aos/events/logging/logfile_utils.h"
 #include "aos/events/logging/logger_generated.h"
 #include "aos/flatbuffer_merge.h"
@@ -24,7 +26,8 @@
                              const Node *logger_node,
                              std::function<void(NewDataWriter *)> reopen,
                              std::function<void(NewDataWriter *)> close,
-                             size_t max_message_size)
+                             size_t max_message_size,
+                             std::initializer_list<StoredDataType> types)
     : node_(node),
       node_index_(configuration::GetNodeIndex(log_namer->configuration_, node)),
       logger_node_index_(
@@ -32,9 +35,16 @@
       log_namer_(log_namer),
       reopen_(std::move(reopen)),
       close_(std::move(close)),
-      max_message_size_(max_message_size) {
+      max_message_size_(max_message_size),
+      max_out_of_order_duration_(log_namer_->base_max_out_of_order_duration()) {
+  allowed_data_types_.fill(false);
+
   state_.resize(configuration::NodesCount(log_namer->configuration_));
   CHECK_LT(node_index_, state_.size());
+  for (StoredDataType type : types) {
+    CHECK_LT(static_cast<size_t>(type), allowed_data_types_.size());
+    allowed_data_types_[static_cast<size_t>(type)] = true;
+  }
 }
 
 NewDataWriter::~NewDataWriter() {
@@ -48,9 +58,17 @@
   if (header_written_) {
     VLOG(1) << "Rotated " << name();
     ++parts_index_;
+
+    aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> header =
+        MakeHeader();
+
+    if (header.span().size() > max_message_size_) {
+      max_message_size_ = header.span().size();
+    }
+
     reopen_(this);
     header_written_ = false;
-    QueueHeader(MakeHeader());
+    QueueHeader(std::move(header));
   }
 }
 
@@ -79,6 +97,11 @@
   state_[node_index_].boot_uuid = source_node_boot_uuid;
 
   VLOG(1) << "Rebooted " << name();
+  newest_message_time_ = monotonic_clock::min_time;
+  // When a node reboots, parts_uuid changes but the same writer continues to
+  // write the data, so we can reset the max out of order duration. If we don't
+  // do this, the max out of order duration can grow to an unreasonable value.
+  max_out_of_order_duration_ = log_namer_->base_max_out_of_order_duration();
 }
 
 void NewDataWriter::UpdateBoot(const UUID &source_node_boot_uuid) {
@@ -184,9 +207,38 @@
   }
 }
 
+void NewDataWriter::CopyDataMessage(
+    DataEncoder::Copier *coppier, const UUID &source_node_boot_uuid,
+    aos::monotonic_clock::time_point now,
+    aos::monotonic_clock::time_point message_time) {
+  CHECK(allowed_data_types_[static_cast<size_t>(StoredDataType::DATA)])
+      << ": Tried to write data on non-data writer.";
+  CopyMessage(coppier, source_node_boot_uuid, now, message_time);
+}
+
+void NewDataWriter::CopyTimestampMessage(
+    DataEncoder::Copier *coppier, const UUID &source_node_boot_uuid,
+    aos::monotonic_clock::time_point now,
+    aos::monotonic_clock::time_point message_time) {
+  CHECK(allowed_data_types_[static_cast<size_t>(StoredDataType::TIMESTAMPS)])
+      << ": Tried to write timestamps on non-timestamp writer.";
+  CopyMessage(coppier, source_node_boot_uuid, now, message_time);
+}
+
+void NewDataWriter::CopyRemoteTimestampMessage(
+    DataEncoder::Copier *coppier, const UUID &source_node_boot_uuid,
+    aos::monotonic_clock::time_point now,
+    aos::monotonic_clock::time_point message_time) {
+  CHECK(allowed_data_types_[static_cast<size_t>(
+      StoredDataType::REMOTE_TIMESTAMPS)])
+      << ": Tried to write remote timestamps on non-remote timestamp writer.";
+  CopyMessage(coppier, source_node_boot_uuid, now, message_time);
+}
+
 void NewDataWriter::CopyMessage(DataEncoder::Copier *coppier,
                                 const UUID &source_node_boot_uuid,
-                                aos::monotonic_clock::time_point now) {
+                                aos::monotonic_clock::time_point now,
+                                aos::monotonic_clock::time_point message_time) {
   // Trigger a reboot if we detect the boot UUID change.
   UpdateBoot(source_node_boot_uuid);
 
@@ -194,9 +246,65 @@
     QueueHeader(MakeHeader());
   }
 
+  bool max_out_of_order_duration_exceeded = false;
+  // Enforce max out of duration contract.
+  //
+  // Updates the newest message time.
+  // Rotate the part file if current message is more than
+  // max_out_of_order_duration behind the newest message we've logged so far.
+  if (message_time > newest_message_time_) {
+    newest_message_time_ = message_time;
+  }
+
+  // Don't consider messages before start up when checking for max out of order
+  // duration.
+  monotonic_clock::time_point monotonic_start_time =
+      log_namer_->monotonic_start_time(node_index_, source_node_boot_uuid);
+
+  if (std::chrono::nanoseconds((newest_message_time_ -
+                                std::max(monotonic_start_time, message_time))) >
+      max_out_of_order_duration_) {
+    // If the new message is older than 2 * max_out_order_duration, doubling it
+    // won't be sufficient.
+    //
+    // Example: newest_message_time = 10, logged_message_time = 5,
+    // max_ooo_duration = 2
+    //
+    // In this case actual max_ooo_duration = 10 - 5 = 5, but we double the
+    // existing max_ooo_duration we get 4 which is not sufficient.
+    //
+    // Take the max of the two values.
+    max_out_of_order_duration_ =
+        2 * std::max(max_out_of_order_duration_,
+                     std::chrono::nanoseconds(
+                         (newest_message_time_ - message_time)));
+    max_out_of_order_duration_exceeded = true;
+  }
+
   // If the start time has changed for this node, trigger a rotation.
-  if (log_namer_->monotonic_start_time(node_index_, source_node_boot_uuid) !=
-      monotonic_start_time_) {
+  if ((monotonic_start_time != monotonic_start_time_) ||
+      max_out_of_order_duration_exceeded) {
+    // If we just received a start time now, we will rotate parts shortly. Use a
+    // reasonable max out of order durationin the new header based on start time
+    // information available now.
+    if ((monotonic_start_time_ == monotonic_clock::min_time) &&
+        (monotonic_start_time != monotonic_clock::min_time)) {
+      // If we're writing current messages  but we receive an older start time,
+      // we can pick a reasonable max ooo duration number for the next part.
+      //
+      // For example - Our current max ooo duration is 0.3 seconds. We're
+      // writing messages at 20 seconds and recieve a start time of 1 second. We
+      // don't need max ooo duration to be (20 - 1) = 19 seconds although that
+      // would still work.
+      //
+      // Pick the minimum max out of duration value that satisifies the
+      // requirement but bound the minimum at the base value we started with.
+      max_out_of_order_duration_ =
+          std::max(log_namer_->base_max_out_of_order_duration(),
+                   std::min(max_out_of_order_duration_,
+                            std::chrono::nanoseconds(newest_message_time_ -
+                                                     monotonic_start_time)));
+    }
     CHECK(header_written_);
     Rotate();
   }
@@ -207,6 +315,7 @@
   CHECK(writer);
   CHECK(header_written_) << ": Attempting to write message before header to "
                          << writer->name();
+  CHECK_LE(coppier->size(), max_message_size_);
   writer->CopyMessage(coppier, now);
 }
 
@@ -221,8 +330,9 @@
   } else {
     CHECK_EQ(state_[logger_node_index].boot_uuid, logger_node_boot_uuid);
   }
-  return log_namer_->MakeHeader(node_index_, state_, parts_uuid(),
-                                parts_index_);
+  return log_namer_->MakeHeader(node_index_, state_, parts_uuid(), parts_index_,
+                                max_out_of_order_duration_,
+                                allowed_data_types_);
 }
 
 void NewDataWriter::QueueHeader(
@@ -252,6 +362,7 @@
 
   CHECK(writer);
   DataEncoder::SpanCopier coppier(header.span());
+  CHECK_LE(coppier.size(), max_message_size_);
   writer->CopyMessage(&coppier, aos::monotonic_clock::now());
   header_written_ = true;
   monotonic_start_time_ = log_namer_->monotonic_start_time(
@@ -278,11 +389,14 @@
 
 aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> LogNamer::MakeHeader(
     size_t node_index, const std::vector<NewDataWriter::State> &state,
-    const UUID &parts_uuid, int parts_index) {
+    const UUID &parts_uuid, int parts_index,
+    std::chrono::nanoseconds max_out_of_order_duration,
+    const std::array<bool, static_cast<size_t>(StoredDataType::MAX) + 1>
+        &allowed_data_types) {
   const UUID &source_node_boot_uuid = state[node_index].boot_uuid;
   const Node *const source_node =
       configuration::GetNode(configuration_, node_index);
-  CHECK_EQ(LogFileHeader::MiniReflectTypeTable()->num_elems, 34u);
+  CHECK_EQ(LogFileHeader::MiniReflectTypeTable()->num_elems, 35u);
   flatbuffers::FlatBufferBuilder fbb;
   fbb.ForceDefaults(true);
 
@@ -460,6 +574,18 @@
       flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
       boot_uuids_offset = fbb.CreateVector(boot_uuid_offsets);
 
+  aos::ErrorList<StoredDataType> allowed_data_types_vector;
+  for (size_t type = static_cast<size_t>(StoredDataType::MIN);
+       type <= static_cast<size_t>(StoredDataType::MAX); ++type) {
+    if (allowed_data_types[type]) {
+      allowed_data_types_vector.Set(static_cast<StoredDataType>(type));
+    }
+  }
+
+  flatbuffers::Offset<flatbuffers::Vector<StoredDataType>> data_stored_offset =
+      fbb.CreateVector(allowed_data_types_vector.data(),
+                       allowed_data_types_vector.size());
+
   aos::logger::LogFileHeader::Builder log_file_header_builder(fbb);
 
   log_file_header_builder.add_name(name_offset);
@@ -479,8 +605,9 @@
   if (!configuration_offset.IsNull()) {
     log_file_header_builder.add_configuration(configuration_offset);
   }
+
   log_file_header_builder.add_max_out_of_order_duration(
-      header_.message().max_out_of_order_duration());
+      max_out_of_order_duration.count());
 
   NodeState *node_state = GetNodeState(node_index, source_node_boot_uuid);
   log_file_header_builder.add_monotonic_start_time(
@@ -552,6 +679,8 @@
   log_file_header_builder
       .add_oldest_logger_local_unreliable_monotonic_timestamps(
           oldest_logger_local_unreliable_monotonic_timestamps_offset);
+
+  log_file_header_builder.add_data_stored(data_stored_offset);
   fbb.FinishSizePrefixed(log_file_header_builder.Finish());
   aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> result(
       fbb.Release());
@@ -585,16 +714,10 @@
 }
 
 void MultiNodeLogNamer::Rotate(const Node *node) {
-  if (node == this->node()) {
-    if (data_writer_) {
-      data_writer_->Rotate();
-    }
-  } else {
-    for (std::pair<const Channel *const, NewDataWriter> &data_writer :
-         data_writers_) {
-      if (node == data_writer.second.node()) {
-        data_writer.second.Rotate();
-      }
+  for (auto &data_map : {&node_data_writers_, &node_timestamp_writers_}) {
+    auto it = data_map->find(node);
+    if (it != data_map->end()) {
+      it->second.Rotate();
     }
   }
 }
@@ -622,6 +745,50 @@
   CloseWriter(&writer);
 }
 
+void MultiNodeLogNamer::NoticeNode(const Node *source_node) {
+  if (std::find(nodes_.begin(), nodes_.end(), source_node) == nodes_.end()) {
+    nodes_.emplace_back(source_node);
+  }
+}
+
+NewDataWriter *MultiNodeLogNamer::FindNodeDataWriter(const Node *source_node,
+                                                     size_t max_message_size) {
+  NoticeNode(source_node);
+
+  auto it = node_data_writers_.find(source_node);
+  if (it != node_data_writers_.end()) {
+    it->second.UpdateMaxMessageSize(max_message_size);
+    return &(it->second);
+  }
+  return nullptr;
+}
+
+NewDataWriter *MultiNodeLogNamer::FindNodeTimestampWriter(
+    const Node *source_node, size_t max_message_size) {
+  NoticeNode(source_node);
+
+  auto it = node_timestamp_writers_.find(source_node);
+  if (it != node_timestamp_writers_.end()) {
+    it->second.UpdateMaxMessageSize(max_message_size);
+    return &(it->second);
+  }
+  return nullptr;
+}
+
+NewDataWriter *MultiNodeLogNamer::AddNodeDataWriter(const Node *source_node,
+                                                    NewDataWriter &&writer) {
+  auto result = node_data_writers_.emplace(source_node, std::move(writer));
+  CHECK(result.second);
+  return &(result.first->second);
+}
+
+NewDataWriter *MultiNodeLogNamer::AddNodeTimestampWriter(
+    const Node *source_node, NewDataWriter &&writer) {
+  auto result = node_timestamp_writers_.emplace(source_node, std::move(writer));
+  CHECK(result.second);
+  return &(result.first->second);
+}
+
 NewDataWriter *MultiNodeLogNamer::MakeWriter(const Channel *channel) {
   // See if we can read the data on this node at all.
   const bool is_readable =
@@ -638,39 +805,40 @@
     return nullptr;
   }
 
-  // Now, sort out if this is data generated on this node, or not.  It is
-  // generated if it is sendable on this node.
-  if (configuration::ChannelIsSendableOnNode(channel, this->node())) {
-    if (!data_writer_) {
-      MakeDataWriter();
-    }
-    data_writer_->UpdateMaxMessageSize(
-        PackMessageSize(LogType::kLogRemoteMessage, channel->max_size()));
-    return data_writer_.get();
-  }
-
   // Ok, we have data that is being forwarded to us that we are supposed to
   // log.  It needs to be logged with send timestamps, but be sorted enough
   // to be able to be processed.
-  CHECK(data_writers_.find(channel) == data_writers_.end());
 
-  // Track that this node is being logged.
-  const Node *source_node = configuration::GetNode(
-      configuration_, channel->source_node()->string_view());
+  const Node *source_node =
+      configuration::MultiNode(configuration_)
+          ? configuration::GetNode(configuration_,
+                                   channel->source_node()->string_view())
+          : nullptr;
 
-  if (std::find(nodes_.begin(), nodes_.end(), source_node) == nodes_.end()) {
-    nodes_.emplace_back(source_node);
+  // If we already have a data writer for the node, then use the same writer for
+  // all channels of that node.
+  NewDataWriter *result = FindNodeDataWriter(
+      source_node,
+      PackMessageSize(LogType::kLogRemoteMessage, channel->max_size()));
+  if (result != nullptr) {
+    return result;
   }
 
-  NewDataWriter data_writer(
-      this, source_node, node_,
-      [this, channel](NewDataWriter *data_writer) {
-        OpenWriter(channel, data_writer);
-      },
-      [this](NewDataWriter *data_writer) { CloseWriter(&data_writer->writer); },
-      PackMessageSize(LogType::kLogRemoteMessage, channel->max_size()));
-  return &(
-      data_writers_.emplace(channel, std::move(data_writer)).first->second);
+  // If we don't have a data writer for the node, create one.
+  return AddNodeDataWriter(
+      source_node,
+      NewDataWriter{
+          this,
+          source_node,
+          node_,
+          [this, source_node](NewDataWriter *data_writer) {
+            OpenDataWriter(source_node, data_writer);
+          },
+          [this](NewDataWriter *data_writer) {
+            CloseWriter(&data_writer->writer);
+          },
+          PackMessageSize(LogType::kLogRemoteMessage, channel->max_size()),
+          {StoredDataType::DATA}});
 }
 
 NewDataWriter *MultiNodeLogNamer::MakeForwardedTimestampWriter(
@@ -680,21 +848,29 @@
       configuration::ChannelIsReadableOnNode(channel, this->node());
   CHECK(is_readable) << ": " << configuration::CleanedChannelToString(channel);
 
-  CHECK(data_writers_.find(channel) == data_writers_.end());
+  CHECK_NE(node, this->node());
 
-  if (std::find(nodes_.begin(), nodes_.end(), node) == nodes_.end()) {
-    nodes_.emplace_back(node);
+  // If we have a remote timestamp writer for a particular node, use the same
+  // writer for all remote timestamp channels of that node.
+  NewDataWriter *result =
+      FindNodeTimestampWriter(node, PackRemoteMessageSize());
+  if (result != nullptr) {
+    return result;
   }
 
-  NewDataWriter data_writer(
-      this, configuration::GetNode(configuration_, node), node_,
-      [this, channel](NewDataWriter *data_writer) {
-        OpenForwardedTimestampWriter(channel, data_writer);
-      },
-      [this](NewDataWriter *data_writer) { CloseWriter(&data_writer->writer); },
-      PackRemoteMessageSize());
-  return &(
-      data_writers_.emplace(channel, std::move(data_writer)).first->second);
+  // If there are no remote timestamp writers for the node, create one.
+  return AddNodeTimestampWriter(
+      node, NewDataWriter{this,
+                          configuration::GetNode(configuration_, node),
+                          node_,
+                          [this](NewDataWriter *data_writer) {
+                            OpenForwardedTimestampWriter(node_, data_writer);
+                          },
+                          [this](NewDataWriter *data_writer) {
+                            CloseWriter(&data_writer->writer);
+                          },
+                          PackRemoteMessageSize(),
+                          {StoredDataType::REMOTE_TIMESTAMPS}});
 }
 
 NewDataWriter *MultiNodeLogNamer::MakeTimestampWriter(const Channel *channel) {
@@ -707,17 +883,30 @@
     return nullptr;
   }
 
-  if (!data_writer_) {
-    MakeDataWriter();
+  // There is only one of these.
+  NewDataWriter *result = FindNodeTimestampWriter(
+      this->node(), PackMessageSize(LogType::kLogDeliveryTimeOnly, 0));
+  if (result != nullptr) {
+    return result;
   }
-  data_writer_->UpdateMaxMessageSize(
-      PackMessageSize(LogType::kLogDeliveryTimeOnly, 0));
-  return data_writer_.get();
+
+  return AddNodeTimestampWriter(
+      node_, NewDataWriter{this,
+                           node_,
+                           node_,
+                           [this](NewDataWriter *data_writer) {
+                             OpenTimestampWriter(data_writer);
+                           },
+                           [this](NewDataWriter *data_writer) {
+                             CloseWriter(&data_writer->writer);
+                           },
+                           PackMessageSize(LogType::kLogDeliveryTimeOnly, 0),
+                           {StoredDataType::TIMESTAMPS}});
 }
 
 WriteCode MultiNodeLogNamer::Close() {
-  data_writers_.clear();
-  data_writer_.reset();
+  node_data_writers_.clear();
+  node_timestamp_writers_.clear();
   if (ran_out_of_space_) {
     return WriteCode::kOutOfSpace;
   }
@@ -725,13 +914,15 @@
 }
 
 void MultiNodeLogNamer::ResetStatistics() {
-  for (std::pair<const Channel *const, NewDataWriter> &data_writer :
-       data_writers_) {
+  for (std::pair<const Node *const, NewDataWriter> &data_writer :
+       node_data_writers_) {
     if (!data_writer.second.writer) continue;
     data_writer.second.writer->WriteStatistics()->ResetStats();
   }
-  if (data_writer_ != nullptr && data_writer_->writer != nullptr) {
-    data_writer_->writer->WriteStatistics()->ResetStats();
+  for (std::pair<const Node *const, NewDataWriter> &data_writer :
+       node_timestamp_writers_) {
+    if (!data_writer.second.writer) continue;
+    data_writer.second.writer->WriteStatistics()->ResetStats();
   }
   max_write_time_ = std::chrono::nanoseconds::zero();
   max_write_time_bytes_ = -1;
@@ -743,47 +934,40 @@
 }
 
 void MultiNodeLogNamer::OpenForwardedTimestampWriter(
-    const Channel *channel, NewDataWriter *data_writer) {
+    const Node * /*source_node*/, NewDataWriter *data_writer) {
+  const std::string filename = absl::StrCat(
+      "timestamps/remote_", data_writer->node()->name()->string_view(), ".part",
+      data_writer->parts_index(), ".bfbs", extension_);
+  CreateBufferWriter(filename, data_writer->max_message_size(),
+                     &data_writer->writer);
+}
+
+void MultiNodeLogNamer::OpenDataWriter(const Node *source_node,
+                                       NewDataWriter *data_writer) {
+  std::string filename;
+
+  if (source_node != nullptr) {
+    if (source_node == node_) {
+      filename = absl::StrCat(source_node->name()->string_view(), "_");
+    } else {
+      filename = absl::StrCat("data/", source_node->name()->string_view(), "_");
+    }
+  }
+
+  absl::StrAppend(&filename, "data.part", data_writer->parts_index(), ".bfbs",
+                  extension_);
+  CreateBufferWriter(filename, data_writer->max_message_size(),
+                     &data_writer->writer);
+}
+
+void MultiNodeLogNamer::OpenTimestampWriter(NewDataWriter *data_writer) {
   std::string filename =
-      absl::StrCat("timestamps", channel->name()->string_view(), "/",
-                   channel->type()->string_view(), ".part",
+      absl::StrCat(node()->name()->string_view(), "_timestamps.part",
                    data_writer->parts_index(), ".bfbs", extension_);
   CreateBufferWriter(filename, data_writer->max_message_size(),
                      &data_writer->writer);
 }
 
-void MultiNodeLogNamer::OpenWriter(const Channel *channel,
-                                   NewDataWriter *data_writer) {
-  const std::string filename = absl::StrCat(
-      CHECK_NOTNULL(channel->source_node())->string_view(), "_data",
-      channel->name()->string_view(), "/", channel->type()->string_view(),
-      ".part", data_writer->parts_index(), ".bfbs", extension_);
-  CreateBufferWriter(filename, data_writer->max_message_size(),
-                     &data_writer->writer);
-}
-
-void MultiNodeLogNamer::MakeDataWriter() {
-  if (!data_writer_) {
-    data_writer_ = std::make_unique<NewDataWriter>(
-        this, node_, node_,
-        [this](NewDataWriter *writer) {
-          std::string name;
-          if (node() != nullptr) {
-            name = absl::StrCat(name, node()->name()->string_view(), "_");
-          }
-          absl::StrAppend(&name, "data.part", writer->parts_index(), ".bfbs",
-                          extension_);
-          CreateBufferWriter(name, writer->max_message_size(), &writer->writer);
-        },
-        [this](NewDataWriter *data_writer) {
-          CloseWriter(&data_writer->writer);
-        },
-        // Default size is 0 so it will be obvious if something doesn't fix it
-        // afterwards.
-        0);
-  }
-}
-
 void MultiNodeLogNamer::CreateBufferWriter(
     std::string_view path, size_t max_message_size,
     std::unique_ptr<DetachedBufferWriter> *destination) {
@@ -846,5 +1030,167 @@
   }
 }
 
+NewDataWriter *MinimalFileMultiNodeLogNamer::MakeWriter(
+    const Channel *channel) {
+  // See if we can read the data on this node at all.
+  const bool is_readable =
+      configuration::ChannelIsReadableOnNode(channel, this->node());
+  if (!is_readable) {
+    return nullptr;
+  }
+
+  // Then, see if we are supposed to log the data here.
+  const bool log_message =
+      configuration::ChannelMessageIsLoggedOnNode(channel, this->node());
+
+  if (!log_message) {
+    return nullptr;
+  }
+
+  // Ok, we have data that is being forwarded to us that we are supposed to
+  // log.  It needs to be logged with send timestamps, but be sorted enough
+  // to be able to be processed.
+
+  const Node *source_node =
+      configuration::MultiNode(configuration_)
+          ? configuration::GetNode(configuration_,
+                                   channel->source_node()->string_view())
+          : nullptr;
+
+  // If we don't have a data writer for the node, create one.
+  if (this->node() == source_node) {
+    // If we already have a data writer for the node, then use the same writer
+    // for all channels of that node.
+    NewDataWriter *result = FindNodeDataWriter(
+        source_node,
+        PackMessageSize(LogType::kLogMessage, channel->max_size()));
+    if (result != nullptr) {
+      return result;
+    }
+
+    return AddNodeDataWriter(
+        source_node,
+        NewDataWriter{
+            this,
+            source_node,
+            node_,
+            [this, source_node](NewDataWriter *data_writer) {
+              OpenNodeWriter(source_node, data_writer);
+            },
+            [this](NewDataWriter *data_writer) {
+              CloseWriter(&data_writer->writer);
+            },
+            PackMessageSize(LogType::kLogMessage, channel->max_size()),
+            {StoredDataType::DATA, StoredDataType::TIMESTAMPS}});
+  } else {
+    // If we already have a data writer for the node, then use the same writer
+    // for all channels of that node.
+    NewDataWriter *result = FindNodeDataWriter(
+        source_node,
+        PackMessageSize(LogType::kLogRemoteMessage, channel->max_size()));
+    if (result != nullptr) {
+      return result;
+    }
+
+    return AddNodeDataWriter(
+        source_node,
+        NewDataWriter{
+            this,
+            source_node,
+            node_,
+            [this, source_node](NewDataWriter *data_writer) {
+              OpenNodeWriter(source_node, data_writer);
+            },
+            [this](NewDataWriter *data_writer) {
+              CloseWriter(&data_writer->writer);
+            },
+            PackMessageSize(LogType::kLogRemoteMessage, channel->max_size()),
+            {StoredDataType::DATA, StoredDataType::REMOTE_TIMESTAMPS}});
+  }
+}
+
+NewDataWriter *MinimalFileMultiNodeLogNamer::MakeTimestampWriter(
+    const Channel *channel) {
+  bool log_delivery_times = false;
+  if (this->node() != nullptr) {
+    log_delivery_times = configuration::ConnectionDeliveryTimeIsLoggedOnNode(
+        channel, this->node(), this->node());
+  }
+  if (!log_delivery_times) {
+    return nullptr;
+  }
+
+  // There is only one of these.
+  NewDataWriter *result = FindNodeDataWriter(
+      this->node(), PackMessageSize(LogType::kLogDeliveryTimeOnly, 0));
+  if (result != nullptr) {
+    return result;
+  }
+
+  return AddNodeDataWriter(
+      node_, NewDataWriter{this,
+                           node_,
+                           node_,
+                           [this](NewDataWriter *data_writer) {
+                             OpenNodeWriter(node_, data_writer);
+                           },
+                           [this](NewDataWriter *data_writer) {
+                             CloseWriter(&data_writer->writer);
+                           },
+                           PackMessageSize(LogType::kLogDeliveryTimeOnly, 0),
+                           {StoredDataType::DATA, StoredDataType::TIMESTAMPS}});
+}
+
+NewDataWriter *MinimalFileMultiNodeLogNamer::MakeForwardedTimestampWriter(
+    const Channel *channel, const Node *node) {
+  // See if we can read the data on this node at all.
+  const bool is_readable =
+      configuration::ChannelIsReadableOnNode(channel, this->node());
+  CHECK(is_readable) << ": " << configuration::CleanedChannelToString(channel);
+
+  CHECK_NE(node, this->node());
+
+  // If we have a remote timestamp writer for a particular node, use the same
+  // writer for all remote timestamp channels of that node.
+  NewDataWriter *result = FindNodeDataWriter(node, PackRemoteMessageSize());
+  if (result != nullptr) {
+    return result;
+  }
+
+  // If there are no remote timestamp writers for the node, create one.
+  return AddNodeDataWriter(
+      node,
+      NewDataWriter{this,
+                    configuration::GetNode(configuration_, node),
+                    node_,
+                    [this, node](NewDataWriter *data_writer) {
+                      OpenNodeWriter(node, data_writer);
+                    },
+                    [this](NewDataWriter *data_writer) {
+                      CloseWriter(&data_writer->writer);
+                    },
+                    PackRemoteMessageSize(),
+                    {StoredDataType::DATA, StoredDataType::REMOTE_TIMESTAMPS}});
+}
+
+void MinimalFileMultiNodeLogNamer::OpenNodeWriter(const Node *source_node,
+                                                  NewDataWriter *data_writer) {
+  std::string filename;
+
+  if (node() != nullptr) {
+    filename = absl::StrCat(node()->name()->string_view(), "_");
+  }
+
+  if (source_node != nullptr) {
+    absl::StrAppend(&filename, source_node->name()->string_view(), "_");
+  }
+
+  absl::StrAppend(&filename, "all.part", data_writer->parts_index(), ".bfbs",
+                  extension_);
+  VLOG(1) << "Going to open " << filename;
+  CreateBufferWriter(filename, data_writer->max_message_size(),
+                     &data_writer->writer);
+}
+
 }  // namespace logger
 }  // namespace aos

diff --git a/aos/events/logging/log_namer.h b/aos/events/logging/log_namer.h
index 8d69fbb..5374acf 100644
--- a/aos/events/logging/log_namer.h
+++ b/aos/events/logging/log_namer.h

@@ -41,7 +41,8 @@
   NewDataWriter(LogNamer *log_namer, const Node *node, const Node *logger_node,
                 std::function<void(NewDataWriter *)> reopen,
                 std::function<void(NewDataWriter *)> close,
-                size_t max_message_size);
+                size_t max_message_size,
+                std::initializer_list<StoredDataType> types);
 
   void UpdateMaxMessageSize(size_t new_size) {
     if (new_size > max_message_size_) {
@@ -52,6 +53,10 @@
   }
   size_t max_message_size() const { return max_message_size_; }
 
+  std::chrono::nanoseconds max_out_of_order_duration() const {
+    return max_out_of_order_duration_;
+  }
+
   NewDataWriter(NewDataWriter &&other) = default;
   aos::logger::NewDataWriter &operator=(NewDataWriter &&other) = default;
   NewDataWriter(const NewDataWriter &) = delete;
@@ -70,10 +75,20 @@
                     bool reliable,
                     monotonic_clock::time_point monotonic_timestamp_time =
                         monotonic_clock::min_time);
+
   // Coppies a message with the provided boot UUID.
-  void CopyMessage(DataEncoder::Copier *coppier,
-                   const UUID &source_node_boot_uuid,
-                   aos::monotonic_clock::time_point now);
+  void CopyDataMessage(DataEncoder::Copier *copier,
+                       const UUID &source_node_boot_uuid,
+                       aos::monotonic_clock::time_point now,
+                       aos::monotonic_clock::time_point message_time);
+  void CopyTimestampMessage(DataEncoder::Copier *copier,
+                            const UUID &source_node_boot_uuid,
+                            aos::monotonic_clock::time_point now,
+                            aos::monotonic_clock::time_point message_time);
+  void CopyRemoteTimestampMessage(
+      DataEncoder::Copier *copier, const UUID &source_node_boot_uuid,
+      aos::monotonic_clock::time_point now,
+      aos::monotonic_clock::time_point message_time);
 
   // Updates the current boot for the source node.  This is useful when you want
   // to queue a message that may trigger a reboot rotation, but then need to
@@ -139,6 +154,11 @@
   // Signals that a node has rebooted.
   void Reboot(const UUID &source_node_boot_uuid);
 
+  void CopyMessage(DataEncoder::Copier *copier,
+                   const UUID &source_node_boot_uuid,
+                   aos::monotonic_clock::time_point now,
+                   aos::monotonic_clock::time_point message_time);
+
   void QueueHeader(
       aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> &&header);
 
@@ -160,6 +180,28 @@
   std::vector<State> state_;
 
   size_t max_message_size_;
+
+  // Each data writer logs the channels for that node, i.e.
+  // each data writer writes one file. We may encounter messages which
+  // violate the max out of order duration specified in the header of that file.
+  // Rotate the data writer and start a new part for that particular file.
+  // This shouldn't affect the headers of other data writers, so make this
+  // a property of individual data writer instead of the overall log.
+  std::chrono::nanoseconds max_out_of_order_duration_;
+
+  // Monotonic time point of the latest message we've logged so far, i.e
+  // Message X - time Z
+  // Message Y - time Z + 1
+  // newest_message_time_ = Z + 1 (even if X was logged after Y)
+  //
+  // Since the messages can be logged out of order, this helps determine if
+  // max out of order duration was violated.
+  monotonic_clock::time_point newest_message_time_ = monotonic_clock::min_time;
+
+  // An array with a bool for each value of StoredDataType representing if that
+  // data type is allowed to be logged by this object.
+  std::array<bool, static_cast<size_t>(StoredDataType::MAX) + 1>
+      allowed_data_types_;
 };
 
 // Interface describing how to name, track, and add headers to log file parts.
@@ -252,6 +294,14 @@
     return node_state->monotonic_start_time;
   }
 
+  // This returns the initial out of order duration set in the header template
+  // by the logger based on polling period. It may be different than the actual
+  // duration used by the data writer.
+  std::chrono::nanoseconds base_max_out_of_order_duration() const {
+    return std::chrono::nanoseconds(
+        header_.message().max_out_of_order_duration());
+  }
+
  protected:
   // Structure with state per node about times and such.
   struct NodeState {
@@ -271,7 +321,10 @@
   // them with the arguments provided.
   aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> MakeHeader(
       size_t node_index, const std::vector<NewDataWriter::State> &state,
-      const UUID &parts_uuid, int parts_index);
+      const UUID &parts_uuid, int parts_index,
+      std::chrono::nanoseconds max_out_of_order_duration,
+      const std::array<bool, static_cast<size_t>(StoredDataType::MAX) + 1>
+          &allowed_data_types);
 
   EventLoop *event_loop_;
   const Configuration *const configuration_;
@@ -451,33 +504,50 @@
   LogBackend *log_backend() { return log_backend_.get(); }
   const LogBackend *log_backend() const { return log_backend_.get(); }
 
- private:
-  // Opens up a writer for timestamps forwarded back.
-  void OpenForwardedTimestampWriter(const Channel *channel,
-                                    NewDataWriter *data_writer);
+  // Returns the data writer or timestamp writer if we find one for the provided
+  // node.
+  NewDataWriter *FindNodeDataWriter(const Node *node, size_t max_message_size);
+  NewDataWriter *FindNodeTimestampWriter(const Node *node,
+                                         size_t max_message_size);
 
-  // Opens up a writer for remote data.
-  void OpenWriter(const Channel *channel, NewDataWriter *data_writer);
+  // Saves the data writer or timestamp writer for the provided node.
+  NewDataWriter *AddNodeDataWriter(const Node *node, NewDataWriter &&writer);
+  NewDataWriter *AddNodeTimestampWriter(const Node *node,
+                                        NewDataWriter &&writer);
 
-  // Opens the main data writer file for this node responsible for data_writer_.
-  void MakeDataWriter();
+  void CloseWriter(std::unique_ptr<DetachedBufferWriter> *writer_pointer);
 
   void CreateBufferWriter(std::string_view path, size_t max_message_size,
                           std::unique_ptr<DetachedBufferWriter> *destination);
 
-  void CloseWriter(std::unique_ptr<DetachedBufferWriter> *writer_pointer);
+  std::string extension_;
+
+ private:
+  // Opens up a writer for timestamps forwarded back.
+  void OpenForwardedTimestampWriter(const Node *source_node,
+                                    NewDataWriter *data_writer);
+
+  // Opens up a writer for remote data.
+  void OpenDataWriter(const Node *source_node, NewDataWriter *data_writer);
+  void OpenTimestampWriter(NewDataWriter *data_writer);
+
+  // Tracks the node in nodes_.
+  void NoticeNode(const Node *source_node);
 
   // A version of std::accumulate which operates over all of our DataWriters.
   template <typename T, typename BinaryOperation>
   T accumulate_data_writers(T t, BinaryOperation op) const {
-    for (const std::pair<const Channel *const, NewDataWriter> &data_writer :
-         data_writers_) {
+    for (const std::pair<const Node *const, NewDataWriter> &data_writer :
+         node_data_writers_) {
       if (data_writer.second.writer != nullptr) {
         t = op(std::move(t), data_writer.second);
       }
     }
-    if (data_writer_ != nullptr && data_writer_->writer != nullptr) {
-      t = op(std::move(t), *data_writer_);
+    for (const std::pair<const Node *const, NewDataWriter> &data_writer :
+         node_timestamp_writers_) {
+      if (data_writer.second.writer != nullptr) {
+        t = op(std::move(t), data_writer.second);
+      }
     }
     return t;
   }
@@ -488,7 +558,6 @@
   std::vector<std::string> all_filenames_;
 
   std::function<std::unique_ptr<DataEncoder>(size_t)> encoder_factory_;
-  std::string extension_;
 
   // Storage for statistics from previously-rotated DetachedBufferWriters.
   std::chrono::nanoseconds max_write_time_ = std::chrono::nanoseconds::zero();
@@ -499,10 +568,10 @@
   int total_write_messages_ = 0;
   int total_write_bytes_ = 0;
 
-  // File to write both delivery timestamps and local data to.
-  std::unique_ptr<NewDataWriter> data_writer_;
-
-  std::map<const Channel *, NewDataWriter> data_writers_;
+  // Data writer per remote node.
+  std::map<const Node *, NewDataWriter> node_data_writers_;
+  // Remote timestamp writers per node.
+  std::map<const Node *, NewDataWriter> node_timestamp_writers_;
 };
 
 // This is specialized log namer that deals with directory centric log events.
@@ -557,6 +626,30 @@
   }
 };
 
+// Class which dumps all data from each node into a single file per node.  This
+// is mostly interesting for testing.
+class MinimalFileMultiNodeLogNamer : public MultiNodeFilesLogNamer {
+ public:
+  MinimalFileMultiNodeLogNamer(std::string_view base_name,
+                               EventLoop *event_loop)
+      : MultiNodeFilesLogNamer(base_name, event_loop) {}
+  MinimalFileMultiNodeLogNamer(std::string_view base_name,
+                               const Configuration *configuration,
+                               EventLoop *event_loop, const Node *node)
+      : MultiNodeFilesLogNamer(base_name, configuration, event_loop, node) {}
+
+  NewDataWriter *MakeWriter(const Channel *channel) override;
+
+  NewDataWriter *MakeForwardedTimestampWriter(const Channel *channel,
+                                              const Node *node) override;
+
+  NewDataWriter *MakeTimestampWriter(const Channel *channel) override;
+
+ private:
+  // Names the data writer.
+  void OpenNodeWriter(const Node *source_node, NewDataWriter *data_writer);
+};
+
 }  // namespace logger
 }  // namespace aos
 

diff --git a/aos/events/logging/log_reader.cc b/aos/events/logging/log_reader.cc
index e3dd904..5b437ba 100644
--- a/aos/events/logging/log_reader.cc
+++ b/aos/events/logging/log_reader.cc

@@ -35,6 +35,19 @@
             "false, CHECK.");
 
 DECLARE_bool(timestamps_to_csv);
+DEFINE_bool(
+    enable_timestamp_loading, true,
+    "Enable loading all the timestamps into RAM at startup if they are in "
+    "separate files.  This fixes any timestamp queueing problems for the cost "
+    "of storing timestamps in RAM only on logs with timestamps logged in "
+    "separate files from data.  Only disable this if you are reading a really "
+    "long log file and are experiencing memory problems due to loading all the "
+    "timestamps into RAM.");
+DEFINE_bool(
+    force_timestamp_loading, false,
+    "Force loading all the timestamps into RAM at startup.  This fixes any "
+    "timestamp queueing problems for the cost of storing timestamps in RAM and "
+    "potentially reading each log twice.");
 
 DEFINE_bool(skip_order_validation, false,
             "If true, ignore any out of orderness in replay");
@@ -73,91 +86,6 @@
 namespace logger {
 namespace {
 
-bool CompareChannels(const Channel *c,
-                     ::std::pair<std::string_view, std::string_view> p) {
-  int name_compare = c->name()->string_view().compare(p.first);
-  if (name_compare == 0) {
-    return c->type()->string_view() < p.second;
-  } else if (name_compare < 0) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
-bool EqualsChannels(const Channel *c,
-                    ::std::pair<std::string_view, std::string_view> p) {
-  return c->name()->string_view() == p.first &&
-         c->type()->string_view() == p.second;
-}
-
-// Copies the channel, removing the schema as we go.  If new_name is provided,
-// it is used instead of the name inside the channel.  If new_type is provided,
-// it is used instead of the type in the channel.
-flatbuffers::Offset<Channel> CopyChannel(const Channel *c,
-                                         std::string_view new_name,
-                                         std::string_view new_type,
-                                         flatbuffers::FlatBufferBuilder *fbb) {
-  CHECK_EQ(Channel::MiniReflectTypeTable()->num_elems, 14u)
-      << ": Merging logic needs to be updated when the number of channel "
-         "fields changes.";
-
-  flatbuffers::Offset<flatbuffers::String> name_offset =
-      fbb->CreateSharedString(new_name.empty() ? c->name()->string_view()
-                                               : new_name);
-  flatbuffers::Offset<flatbuffers::String> type_offset =
-      fbb->CreateSharedString(new_type.empty() ? c->type()->str() : new_type);
-  flatbuffers::Offset<flatbuffers::String> source_node_offset =
-      c->has_source_node() ? fbb->CreateSharedString(c->source_node()->str())
-                           : 0;
-
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Connection>>>
-      destination_nodes_offset =
-          aos::RecursiveCopyVectorTable(c->destination_nodes(), fbb);
-
-  flatbuffers::Offset<
-      flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
-      logger_nodes_offset = aos::CopyVectorSharedString(c->logger_nodes(), fbb);
-
-  Channel::Builder channel_builder(*fbb);
-  channel_builder.add_name(name_offset);
-  channel_builder.add_type(type_offset);
-  if (c->has_frequency()) {
-    channel_builder.add_frequency(c->frequency());
-  }
-  if (c->has_max_size()) {
-    channel_builder.add_max_size(c->max_size());
-  }
-  if (c->has_num_senders()) {
-    channel_builder.add_num_senders(c->num_senders());
-  }
-  if (c->has_num_watchers()) {
-    channel_builder.add_num_watchers(c->num_watchers());
-  }
-  if (!source_node_offset.IsNull()) {
-    channel_builder.add_source_node(source_node_offset);
-  }
-  if (!destination_nodes_offset.IsNull()) {
-    channel_builder.add_destination_nodes(destination_nodes_offset);
-  }
-  if (c->has_logger()) {
-    channel_builder.add_logger(c->logger());
-  }
-  if (!logger_nodes_offset.IsNull()) {
-    channel_builder.add_logger_nodes(logger_nodes_offset);
-  }
-  if (c->has_read_method()) {
-    channel_builder.add_read_method(c->read_method());
-  }
-  if (c->has_num_readers()) {
-    channel_builder.add_num_readers(c->num_readers());
-  }
-  if (c->has_channel_storage_duration()) {
-    channel_builder.add_channel_storage_duration(c->channel_storage_duration());
-  }
-  return channel_builder.Finish();
-}
-
 namespace chrono = std::chrono;
 using message_bridge::RemoteMessage;
 }  // namespace
@@ -259,13 +187,15 @@
                      const ReplayChannels *replay_channels)
     : log_files_(std::move(log_files)),
       replay_configuration_(replay_configuration),
-      replay_channels_(replay_channels) {
+      replay_channels_(replay_channels),
+      config_remapper_(log_files_.config().get(), replay_configuration_,
+                       replay_channels_) {
   SetStartTime(FLAGS_start_time);
   SetEndTime(FLAGS_end_time);
 
   {
     // Log files container validates that log files shared the same config.
-    const Configuration *config = log_files_.config();
+    const Configuration *config = log_files_.config().get();
     CHECK_NOTNULL(config);
   }
 
@@ -274,95 +204,6 @@
                                          "no messages will get replayed.";
   }
 
-  MakeRemappedConfig();
-
-  // Remap all existing remote timestamp channels.  They will be recreated, and
-  // the data logged isn't relevant anymore.
-  for (const Node *node : configuration::GetNodes(logged_configuration())) {
-    message_bridge::ChannelTimestampFinder finder(logged_configuration(),
-                                                  "log_reader", node);
-
-    absl::btree_set<std::string_view> remote_nodes;
-
-    for (const Channel *channel : *logged_configuration()->channels()) {
-      if (!configuration::ChannelIsSendableOnNode(channel, node)) {
-        continue;
-      }
-      if (!channel->has_destination_nodes()) {
-        continue;
-      }
-      for (const Connection *connection : *channel->destination_nodes()) {
-        if (configuration::ConnectionDeliveryTimeIsLoggedOnNode(connection,
-                                                                node)) {
-          // Start by seeing if the split timestamp channels are being used for
-          // this message.  If so, remap them.
-          const Channel *timestamp_channel = configuration::GetChannel(
-              logged_configuration(),
-              finder.SplitChannelName(channel, connection),
-              RemoteMessage::GetFullyQualifiedName(), "", node, true);
-
-          if (timestamp_channel != nullptr) {
-            // If for some reason a timestamp channel is not NOT_LOGGED (which
-            // is unusual), then remap the channel so that the replayed channel
-            // doesn't overlap with the special separate replay we do for
-            // timestamps.
-            if (timestamp_channel->logger() != LoggerConfig::NOT_LOGGED) {
-              RemapLoggedChannel<RemoteMessage>(
-                  timestamp_channel->name()->string_view(), node);
-            }
-            continue;
-          }
-
-          // Otherwise collect this one up as a node to look for a combined
-          // channel from.  It is more efficient to compare nodes than channels.
-          LOG(WARNING) << "Failed to find channel "
-                       << finder.SplitChannelName(channel, connection)
-                       << " on node " << aos::FlatbufferToJson(node);
-          remote_nodes.insert(connection->name()->string_view());
-        }
-      }
-    }
-
-    std::vector<const Node *> timestamp_logger_nodes =
-        configuration::TimestampNodes(logged_configuration(), node);
-    for (const std::string_view remote_node : remote_nodes) {
-      const std::string channel = finder.CombinedChannelName(remote_node);
-
-      // See if the log file is an old log with MessageHeader channels in it, or
-      // a newer log with RemoteMessage.  If we find an older log, rename the
-      // type too along with the name.
-      if (HasChannel<MessageHeader>(channel, node)) {
-        CHECK(!HasChannel<RemoteMessage>(channel, node))
-            << ": Can't have both a MessageHeader and RemoteMessage remote "
-               "timestamp channel.";
-        // In theory, we should check NOT_LOGGED like RemoteMessage and be more
-        // careful about updating the config, but there are fewer and fewer logs
-        // with MessageHeader remote messages, so it isn't worth the effort.
-        RemapLoggedChannel<MessageHeader>(channel, node, "/original",
-                                          "aos.message_bridge.RemoteMessage");
-      } else {
-        CHECK(HasChannel<RemoteMessage>(channel, node))
-            << ": Failed to find {\"name\": \"" << channel << "\", \"type\": \""
-            << RemoteMessage::GetFullyQualifiedName() << "\"} for node "
-            << node->name()->string_view();
-        // Only bother to remap if there's something on the channel.  We can
-        // tell if the channel was marked NOT_LOGGED or not.  This makes the
-        // config not change un-necesarily when we replay a log with NOT_LOGGED
-        // messages.
-        if (HasLoggedChannel<RemoteMessage>(channel, node)) {
-          RemapLoggedChannel<RemoteMessage>(channel, node);
-        }
-      }
-    }
-  }
-
-  if (replay_configuration) {
-    CHECK_EQ(configuration::MultiNode(configuration()),
-             configuration::MultiNode(replay_configuration))
-        << ": Log file and replay config need to both be multi or single "
-           "node.";
-  }
-
   if (!configuration::MultiNode(configuration())) {
     states_.resize(1);
   } else {
@@ -392,19 +233,14 @@
     LOG(FATAL) << "Must call Deregister before the SimulatedEventLoopFactory "
                   "is destroyed";
   }
-  // Zero out some buffers. It's easy to do use-after-frees on these, so make
-  // it more obvious.
-  if (remapped_configuration_buffer_) {
-    remapped_configuration_buffer_->Wipe();
-  }
 }
 
 const Configuration *LogReader::logged_configuration() const {
-  return log_files_.config();
+  return config_remapper_.original_configuration();
 }
 
 const Configuration *LogReader::configuration() const {
-  return remapped_configuration_;
+  return config_remapper_.remapped_configuration();
 }
 
 std::vector<const Node *> LogReader::LoggedNodes() const {
@@ -475,7 +311,7 @@
                                     BootTimestamp>::PushResult result{
               *message, queue_until >= last_queued_message_, false};
           timestamp_mapper_->PopFront();
-          SeedSortedMessages();
+          MaybeSeedSortedMessages();
           return result;
         },
         time);
@@ -545,9 +381,6 @@
 
   stopped_ = true;
   started_ = true;
-  if (message_queuer_.has_value()) {
-    message_queuer_->StopPushing();
-  }
 }
 
 std::vector<
@@ -601,13 +434,21 @@
 void LogReader::RegisterWithoutStarting(
     SimulatedEventLoopFactory *event_loop_factory) {
   event_loop_factory_ = event_loop_factory;
-  remapped_configuration_ = event_loop_factory_->configuration();
+  config_remapper_.set_configuration(event_loop_factory_->configuration());
+
+  const TimestampQueueStrategy timestamp_queue_strategy =
+      ComputeTimestampQueueStrategy();
+
   filters_ =
       std::make_unique<message_bridge::MultiNodeNoncausalOffsetEstimator>(
           event_loop_factory_->configuration(), logged_configuration(),
           log_files_.boots(), FLAGS_skip_order_validation,
-          chrono::duration_cast<chrono::nanoseconds>(
-              chrono::duration<double>(FLAGS_time_estimation_buffer_seconds)));
+          timestamp_queue_strategy ==
+                  TimestampQueueStrategy::kQueueTimestampsAtStartup
+              ? chrono::seconds(0)
+              : chrono::duration_cast<chrono::nanoseconds>(
+                    chrono::duration<double>(
+                        FLAGS_time_estimation_buffer_seconds)));
 
   std::vector<TimestampMapper *> timestamp_mappers;
   for (const Node *node : configuration::GetNodes(configuration())) {
@@ -621,8 +462,10 @@
     states_[node_index] = std::make_unique<State>(
         !log_files_.ContainsPartsForNode(node_name)
             ? nullptr
-            : std::make_unique<TimestampMapper>(node_name, log_files_),
-        filters_.get(), std::bind(&LogReader::NoticeRealtimeEnd, this), node,
+            : std::make_unique<TimestampMapper>(node_name, log_files_,
+                                                timestamp_queue_strategy),
+        timestamp_queue_strategy, filters_.get(),
+        std::bind(&LogReader::NoticeRealtimeEnd, this), node,
         State::ThreadedBuffering::kNo, MaybeMakeReplayChannelIndices(node),
         before_send_callbacks_);
     State *state = states_[node_index].get();
@@ -653,6 +496,14 @@
     }
   }
 
+  // Now that every state has a peer, load all the timestamps into RAM.
+  if (timestamp_queue_strategy ==
+      TimestampQueueStrategy::kQueueTimestampsAtStartup) {
+    for (std::unique_ptr<State> &state : states_) {
+      state->ReadTimestamps();
+    }
+  }
+
   // Register after making all the State objects so we can build references
   // between them.
   for (const Node *node : configuration::GetNodes(configuration())) {
@@ -688,14 +539,14 @@
   filters_->CheckGraph();
 
   for (std::unique_ptr<State> &state : states_) {
-    state->SeedSortedMessages();
+    state->MaybeSeedSortedMessages();
   }
 
   // Forwarding is tracked per channel.  If it is enabled, we want to turn it
   // off.  Otherwise messages replayed will get forwarded across to the other
   // nodes, and also replayed on the other nodes.  This may not satisfy all
   // our users, but it'll start the discussion.
-  if (configuration::MultiNode(event_loop_factory_->configuration())) {
+  if (configuration::NodesCount(event_loop_factory_->configuration()) > 1u) {
     for (size_t i = 0; i < logged_configuration()->channels()->size(); ++i) {
       const Channel *channel = logged_configuration()->channels()->Get(i);
       const Node *node = configuration::GetNode(
@@ -705,7 +556,7 @@
           states_[configuration::GetNodeIndex(configuration(), node)].get();
 
       const Channel *remapped_channel =
-          RemapChannel(state->event_loop(), node, channel);
+          config_remapper_.RemapChannel(state->event_loop(), node, channel);
 
       event_loop_factory_->DisableForwarding(remapped_channel);
     }
@@ -738,7 +589,8 @@
     CHECK(state);
     VLOG(1) << "Start time is " << state->monotonic_start_time(0)
             << " for node '" << MaybeNodeName(state->node()) << "' now "
-            << state->monotonic_now();
+            << (state->event_loop() != nullptr ? state->monotonic_now()
+                                               : monotonic_clock::min_time);
     if (state->monotonic_start_time(0) == monotonic_clock::min_time) {
       continue;
     }
@@ -768,8 +620,9 @@
                                state->realtime_start_time(0));
     }
     VLOG(1) << "Start time is " << state->monotonic_start_time(0)
-            << " for node '" << MaybeNodeName(state->event_loop()->node())
-            << "' now " << state->monotonic_now();
+            << " for node '" << MaybeNodeName(state->node()) << "' now "
+            << (state->event_loop() != nullptr ? state->monotonic_now()
+                                               : monotonic_clock::min_time);
   }
 
   if (FLAGS_timestamps_to_csv) {
@@ -785,16 +638,33 @@
   return nullptr;
 }
 
+TimestampQueueStrategy LogReader::ComputeTimestampQueueStrategy() const {
+  if ((log_files_.TimestampsStoredSeparately() &&
+       FLAGS_enable_timestamp_loading) ||
+      FLAGS_force_timestamp_loading) {
+    return TimestampQueueStrategy::kQueueTimestampsAtStartup;
+  } else {
+    return TimestampQueueStrategy::kQueueTogether;
+  }
+}
+
 // TODO(jkuszmaul): Make in-line modifications to
 // ServerStatistics/ClientStatistics messages for ShmEventLoop-based replay to
 // avoid messing up anything that depends on them having valid offsets.
 void LogReader::Register(EventLoop *event_loop) {
+  const TimestampQueueStrategy timestamp_queue_strategy =
+      ComputeTimestampQueueStrategy();
+
   filters_ =
       std::make_unique<message_bridge::MultiNodeNoncausalOffsetEstimator>(
           event_loop->configuration(), logged_configuration(),
           log_files_.boots(), FLAGS_skip_order_validation,
-          chrono::duration_cast<chrono::nanoseconds>(
-              chrono::duration<double>(FLAGS_time_estimation_buffer_seconds)));
+          timestamp_queue_strategy ==
+                  TimestampQueueStrategy::kQueueTimestampsAtStartup
+              ? chrono::seconds(0)
+              : chrono::duration_cast<chrono::nanoseconds>(
+                    chrono::duration<double>(
+                        FLAGS_time_estimation_buffer_seconds)));
 
   std::vector<TimestampMapper *> timestamp_mappers;
   for (const Node *node : configuration::GetNodes(configuration())) {
@@ -805,8 +675,10 @@
     states_[node_index] = std::make_unique<State>(
         !log_files_.ContainsPartsForNode(node_name)
             ? nullptr
-            : std::make_unique<TimestampMapper>(node_name, log_files_),
-        filters_.get(), std::bind(&LogReader::NoticeRealtimeEnd, this), node,
+            : std::make_unique<TimestampMapper>(node_name, log_files_,
+                                                timestamp_queue_strategy),
+        timestamp_queue_strategy, filters_.get(),
+        std::bind(&LogReader::NoticeRealtimeEnd, this), node,
         State::ThreadedBuffering::kYes, MaybeMakeReplayChannelIndices(node),
         before_send_callbacks_);
     State *state = states_[node_index].get();
@@ -830,6 +702,16 @@
       }
     }
   }
+
+  // Now that all the peers are added, we can buffer up all the timestamps if
+  // needed.
+  if (timestamp_queue_strategy ==
+      TimestampQueueStrategy::kQueueTimestampsAtStartup) {
+    for (std::unique_ptr<State> &state : states_) {
+      state->ReadTimestamps();
+    }
+  }
+
   for (const Node *node : configuration::GetNodes(configuration())) {
     if (node == nullptr || node->name()->string_view() ==
                                event_loop->node()->name()->string_view()) {
@@ -889,7 +771,7 @@
   for (size_t logged_channel_index = 0;
        logged_channel_index < logged_configuration()->channels()->size();
        ++logged_channel_index) {
-    const Channel *channel = RemapChannel(
+    const Channel *channel = config_remapper_.RemapChannel(
         event_loop, node,
         logged_configuration()->channels()->Get(logged_channel_index));
 
@@ -1140,6 +1022,9 @@
                      "from one of the nodes it is logged on.";
             }
           }
+          // The log file is now done, prod the callbacks.
+          state->NotifyLogfileEnd();
+
           // Now that we found the end of one channel, artificially stop the
           // rest by setting the found_last_message bit.  It is confusing when
           // part of your data gets replayed but not all.  The rest of them will
@@ -1225,7 +1110,7 @@
             << state->monotonic_now();
   }));
 
-  state->SeedSortedMessages();
+  state->MaybeSeedSortedMessages();
 
   if (state->SingleThreadedOldestMessageTime() != BootTimestamp::max_time()) {
     state->set_startup_timer(
@@ -1303,22 +1188,21 @@
 // Checks if the specified channel name/type exists in the config and, depending
 // on the value of conflict_handling, calls conflict_handler or just dies.
 template <typename F>
-void CheckAndHandleRemapConflict(std::string_view new_name,
-                                 std::string_view new_type,
-                                 const Configuration *config,
-                                 LogReader::RemapConflict conflict_handling,
-                                 F conflict_handler) {
+void CheckAndHandleRemapConflict(
+    std::string_view new_name, std::string_view new_type,
+    const Configuration *config,
+    ConfigRemapper::RemapConflict conflict_handling, F conflict_handler) {
   const Channel *existing_channel =
       configuration::GetChannel(config, new_name, new_type, "", nullptr, true);
   if (existing_channel != nullptr) {
     switch (conflict_handling) {
-      case LogReader::RemapConflict::kDisallow:
+      case ConfigRemapper::RemapConflict::kDisallow:
         LOG(FATAL)
             << "Channel "
             << configuration::StrippedChannelToString(existing_channel)
             << " is already used--you can't remap a logged channel to it.";
         break;
-      case LogReader::RemapConflict::kCascade:
+      case ConfigRemapper::RemapConflict::kCascade:
         LOG(INFO) << "Automatically remapping "
                   << configuration::StrippedChannelToString(existing_channel)
                   << " to avoid conflicts.";
@@ -1329,88 +1213,29 @@
 }
 }  // namespace
 
-void LogReader::RemapLoggedChannel(std::string_view name, std::string_view type,
-                                   std::string_view add_prefix,
-                                   std::string_view new_type,
-                                   RemapConflict conflict_handling) {
-  RemapLoggedChannel(name, type, nullptr, add_prefix, new_type,
-                     conflict_handling);
+void LogReader::RemapLoggedChannel(
+    std::string_view name, std::string_view type, std::string_view add_prefix,
+    std::string_view new_type,
+    ConfigRemapper::RemapConflict conflict_handling) {
+  CheckEventsAreNotScheduled();
+  config_remapper_.RemapOriginalChannel(name, type, nullptr, add_prefix,
+                                        new_type, conflict_handling);
 }
 
-void LogReader::RemapLoggedChannel(std::string_view name, std::string_view type,
-                                   const Node *node,
-                                   std::string_view add_prefix,
-                                   std::string_view new_type,
-                                   RemapConflict conflict_handling) {
-  if (node != nullptr) {
-    VLOG(1) << "Node is " << aos::FlatbufferToJson(node);
-  }
-  if (replay_channels_ != nullptr) {
-    CHECK(std::find(replay_channels_->begin(), replay_channels_->end(),
-                    std::make_pair(std::string{name}, std::string{type})) !=
-          replay_channels_->end())
-        << "Attempted to remap channel " << name << " " << type
-        << " which is not included in the replay channels passed to LogReader.";
-  }
-  const Channel *remapped_channel =
-      configuration::GetChannel(logged_configuration(), name, type, "", node);
-  CHECK(remapped_channel != nullptr) << ": Failed to find {\"name\": \"" << name
-                                     << "\", \"type\": \"" << type << "\"}";
-  VLOG(1) << "Original {\"name\": \"" << name << "\", \"type\": \"" << type
-          << "\"}";
-  VLOG(1) << "Remapped "
-          << aos::configuration::StrippedChannelToString(remapped_channel);
-
-  // We want to make /spray on node 0 go to /0/spray by snooping the maps.  And
-  // we want it to degrade if the heuristics fail to just work.
-  //
-  // The easiest way to do this is going to be incredibly specific and verbose.
-  // Look up /spray, to /0/spray.  Then, prefix the result with /original to get
-  // /original/0/spray.  Then, create a map from /original/spray to
-  // /original/0/spray for just the type we were asked for.
-  if (name != remapped_channel->name()->string_view()) {
-    MapT new_map;
-    new_map.match = std::make_unique<ChannelT>();
-    new_map.match->name = absl::StrCat(add_prefix, name);
-    new_map.match->type = type;
-    if (node != nullptr) {
-      new_map.match->source_node = node->name()->str();
-    }
-    new_map.rename = std::make_unique<ChannelT>();
-    new_map.rename->name =
-        absl::StrCat(add_prefix, remapped_channel->name()->string_view());
-    maps_.emplace_back(std::move(new_map));
-  }
-
-  // Then remap the logged channel to the prefixed channel.
-  const size_t channel_index =
-      configuration::ChannelIndex(logged_configuration(), remapped_channel);
-  CHECK_EQ(0u, remapped_channels_.count(channel_index))
-      << "Already remapped channel "
-      << configuration::CleanedChannelToString(remapped_channel);
-
-  RemappedChannel remapped_channel_struct;
-  remapped_channel_struct.remapped_name =
-      std::string(add_prefix) +
-      std::string(remapped_channel->name()->string_view());
-  remapped_channel_struct.new_type = new_type;
-  const std::string_view remapped_type = new_type.empty() ? type : new_type;
-  CheckAndHandleRemapConflict(
-      remapped_channel_struct.remapped_name, remapped_type,
-      remapped_configuration_, conflict_handling,
-      [this, &remapped_channel_struct, remapped_type, node, add_prefix,
-       conflict_handling]() {
-        RemapLoggedChannel(remapped_channel_struct.remapped_name, remapped_type,
-                           node, add_prefix, "", conflict_handling);
-      });
-  remapped_channels_[channel_index] = std::move(remapped_channel_struct);
-  MakeRemappedConfig();
+void LogReader::RemapLoggedChannel(
+    std::string_view name, std::string_view type, const Node *node,
+    std::string_view add_prefix, std::string_view new_type,
+    ConfigRemapper::RemapConflict conflict_handling) {
+  CheckEventsAreNotScheduled();
+  config_remapper_.RemapOriginalChannel(name, type, node, add_prefix, new_type,
+                                        conflict_handling);
 }
 
 void LogReader::RenameLoggedChannel(const std::string_view name,
                                     const std::string_view type,
                                     const std::string_view new_name,
                                     const std::vector<MapT> &add_maps) {
+  CheckEventsAreNotScheduled();
   RenameLoggedChannel(name, type, nullptr, new_name, add_maps);
 }
 
@@ -1419,284 +1244,17 @@
                                     const Node *const node,
                                     const std::string_view new_name,
                                     const std::vector<MapT> &add_maps) {
-  if (node != nullptr) {
-    VLOG(1) << "Node is " << aos::FlatbufferToJson(node);
-  }
-  // First find the channel and rename it.
-  const Channel *remapped_channel =
-      configuration::GetChannel(logged_configuration(), name, type, "", node);
-  CHECK(remapped_channel != nullptr) << ": Failed to find {\"name\": \"" << name
-                                     << "\", \"type\": \"" << type << "\"}";
-  VLOG(1) << "Original {\"name\": \"" << name << "\", \"type\": \"" << type
-          << "\"}";
-  VLOG(1) << "Remapped "
-          << aos::configuration::StrippedChannelToString(remapped_channel);
-
-  const size_t channel_index =
-      configuration::ChannelIndex(logged_configuration(), remapped_channel);
-  CHECK_EQ(0u, remapped_channels_.count(channel_index))
-      << "Already remapped channel "
-      << configuration::CleanedChannelToString(remapped_channel);
-
-  RemappedChannel remapped_channel_struct;
-  remapped_channel_struct.remapped_name = new_name;
-  remapped_channel_struct.new_type.clear();
-  remapped_channels_[channel_index] = std::move(remapped_channel_struct);
-
-  // Then add any provided maps.
-  for (const MapT &map : add_maps) {
-    maps_.push_back(map);
-  }
-
-  // Finally rewrite the config.
-  MakeRemappedConfig();
+  CheckEventsAreNotScheduled();
+  config_remapper_.RenameOriginalChannel(name, type, node, new_name, add_maps);
 }
 
-void LogReader::MakeRemappedConfig() {
+void LogReader::CheckEventsAreNotScheduled() {
   for (std::unique_ptr<State> &state : states_) {
     if (state) {
       CHECK(!state->event_loop())
           << ": Can't change the mapping after the events are scheduled.";
     }
   }
-
-  // If no remapping occurred and we are using the original config, then there
-  // is nothing interesting to do here.
-  if (remapped_channels_.empty() && replay_configuration_ == nullptr) {
-    remapped_configuration_ = logged_configuration();
-    return;
-  }
-  // Config to copy Channel definitions from. Use the specified
-  // replay_configuration_ if it has been provided.
-  const Configuration *const base_config = replay_configuration_ == nullptr
-                                               ? logged_configuration()
-                                               : replay_configuration_;
-
-  // Create a config with all the channels, but un-sorted/merged.  Collect up
-  // the schemas while we do this.  Call MergeConfiguration to sort everything,
-  // and then merge it all in together.
-
-  // This is the builder that we use for the config containing all the new
-  // channels.
-  flatbuffers::FlatBufferBuilder fbb;
-  fbb.ForceDefaults(true);
-  std::vector<flatbuffers::Offset<Channel>> channel_offsets;
-
-  CHECK_EQ(Channel::MiniReflectTypeTable()->num_elems, 14u)
-      << ": Merging logic needs to be updated when the number of channel "
-         "fields changes.";
-
-  // List of schemas.
-  std::map<std::string_view, FlatbufferVector<reflection::Schema>> schema_map;
-  // Make sure our new RemoteMessage schema is in there for old logs without it.
-  schema_map.insert(std::make_pair(
-      RemoteMessage::GetFullyQualifiedName(),
-      FlatbufferVector<reflection::Schema>(FlatbufferSpan<reflection::Schema>(
-          message_bridge::RemoteMessageSchema()))));
-
-  // Reconstruct the remapped channels.
-  for (auto &pair : remapped_channels_) {
-    const Channel *const c = CHECK_NOTNULL(configuration::GetChannel(
-        base_config, logged_configuration()->channels()->Get(pair.first), "",
-        nullptr));
-    channel_offsets.emplace_back(
-        CopyChannel(c, pair.second.remapped_name, "", &fbb));
-
-    if (c->has_destination_nodes()) {
-      for (const Connection *connection : *c->destination_nodes()) {
-        switch (connection->timestamp_logger()) {
-          case LoggerConfig::LOCAL_LOGGER:
-          case LoggerConfig::NOT_LOGGED:
-            // There is no timestamp channel associated with this, so ignore it.
-            break;
-
-          case LoggerConfig::REMOTE_LOGGER:
-          case LoggerConfig::LOCAL_AND_REMOTE_LOGGER:
-            // We want to make a split timestamp channel regardless of what type
-            // of log this used to be.  No sense propagating the single
-            // timestamp channel.
-
-            CHECK(connection->has_timestamp_logger_nodes());
-            for (const flatbuffers::String *timestamp_logger_node :
-                 *connection->timestamp_logger_nodes()) {
-              const Node *node = configuration::GetNode(
-                  logged_configuration(), timestamp_logger_node->string_view());
-              message_bridge::ChannelTimestampFinder finder(
-                  logged_configuration(), "log_reader", node);
-
-              // We are assuming here that all the maps are setup correctly to
-              // handle arbitrary timestamps.  Apply the maps for this node to
-              // see what name this ends up with.
-              std::string name = finder.SplitChannelName(
-                  pair.second.remapped_name, c->type()->str(), connection);
-              std::string unmapped_name = name;
-              configuration::HandleMaps(logged_configuration()->maps(), &name,
-                                        "aos.message_bridge.RemoteMessage",
-                                        node);
-              CHECK_NE(name, unmapped_name)
-                  << ": Remote timestamp channel was not remapped, this is "
-                     "very fishy";
-              flatbuffers::Offset<flatbuffers::String> channel_name_offset =
-                  fbb.CreateString(name);
-              flatbuffers::Offset<flatbuffers::String> channel_type_offset =
-                  fbb.CreateString("aos.message_bridge.RemoteMessage");
-              flatbuffers::Offset<flatbuffers::String> source_node_offset =
-                  fbb.CreateString(timestamp_logger_node->string_view());
-
-              // Now, build a channel.  Don't log it, 2 senders, and match the
-              // source frequency.
-              Channel::Builder channel_builder(fbb);
-              channel_builder.add_name(channel_name_offset);
-              channel_builder.add_type(channel_type_offset);
-              channel_builder.add_source_node(source_node_offset);
-              channel_builder.add_logger(LoggerConfig::NOT_LOGGED);
-              channel_builder.add_num_senders(2);
-              if (c->has_frequency()) {
-                channel_builder.add_frequency(c->frequency());
-              }
-              if (c->has_channel_storage_duration()) {
-                channel_builder.add_channel_storage_duration(
-                    c->channel_storage_duration());
-              }
-              channel_offsets.emplace_back(channel_builder.Finish());
-            }
-            break;
-        }
-      }
-    }
-  }
-
-  // Now reconstruct the original channels, translating types as needed
-  for (const Channel *c : *base_config->channels()) {
-    // Search for a mapping channel.
-    std::string_view new_type = "";
-    for (auto &pair : remapped_channels_) {
-      const Channel *const remapped_channel =
-          logged_configuration()->channels()->Get(pair.first);
-      if (remapped_channel->name()->string_view() == c->name()->string_view() &&
-          remapped_channel->type()->string_view() == c->type()->string_view()) {
-        new_type = pair.second.new_type;
-        break;
-      }
-    }
-
-    // Copy everything over.
-    channel_offsets.emplace_back(CopyChannel(c, "", new_type, &fbb));
-
-    // Add the schema if it doesn't exist.
-    if (schema_map.find(c->type()->string_view()) == schema_map.end()) {
-      CHECK(c->has_schema());
-      schema_map.insert(std::make_pair(c->type()->string_view(),
-                                       RecursiveCopyFlatBuffer(c->schema())));
-    }
-  }
-
-  // The MergeConfiguration API takes a vector, not a map.  Convert.
-  std::vector<FlatbufferVector<reflection::Schema>> schemas;
-  while (!schema_map.empty()) {
-    schemas.emplace_back(std::move(schema_map.begin()->second));
-    schema_map.erase(schema_map.begin());
-  }
-
-  // Create the Configuration containing the new channels that we want to add.
-  const flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Channel>>>
-      channels_offset =
-          channel_offsets.empty() ? 0 : fbb.CreateVector(channel_offsets);
-
-  // Copy over the old maps.
-  std::vector<flatbuffers::Offset<Map>> map_offsets;
-  if (base_config->maps()) {
-    for (const Map *map : *base_config->maps()) {
-      map_offsets.emplace_back(aos::RecursiveCopyFlatBuffer(map, &fbb));
-    }
-  }
-
-  // Now create the new maps.  These are second so they take effect first.
-  for (const MapT &map : maps_) {
-    CHECK(!map.match->name.empty());
-    const flatbuffers::Offset<flatbuffers::String> match_name_offset =
-        fbb.CreateString(map.match->name);
-    flatbuffers::Offset<flatbuffers::String> match_type_offset;
-    if (!map.match->type.empty()) {
-      match_type_offset = fbb.CreateString(map.match->type);
-    }
-    flatbuffers::Offset<flatbuffers::String> match_source_node_offset;
-    if (!map.match->source_node.empty()) {
-      match_source_node_offset = fbb.CreateString(map.match->source_node);
-    }
-    CHECK(!map.rename->name.empty());
-    const flatbuffers::Offset<flatbuffers::String> rename_name_offset =
-        fbb.CreateString(map.rename->name);
-    Channel::Builder match_builder(fbb);
-    match_builder.add_name(match_name_offset);
-    if (!match_type_offset.IsNull()) {
-      match_builder.add_type(match_type_offset);
-    }
-    if (!match_source_node_offset.IsNull()) {
-      match_builder.add_source_node(match_source_node_offset);
-    }
-    const flatbuffers::Offset<Channel> match_offset = match_builder.Finish();
-
-    Channel::Builder rename_builder(fbb);
-    rename_builder.add_name(rename_name_offset);
-    const flatbuffers::Offset<Channel> rename_offset = rename_builder.Finish();
-
-    Map::Builder map_builder(fbb);
-    map_builder.add_match(match_offset);
-    map_builder.add_rename(rename_offset);
-    map_offsets.emplace_back(map_builder.Finish());
-  }
-
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Map>>>
-      maps_offsets = map_offsets.empty() ? 0 : fbb.CreateVector(map_offsets);
-
-  // And copy everything else over.
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Node>>>
-      nodes_offset = aos::RecursiveCopyVectorTable(base_config->nodes(), &fbb);
-
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Application>>>
-      applications_offset =
-          aos::RecursiveCopyVectorTable(base_config->applications(), &fbb);
-
-  // Now insert everything else in unmodified.
-  ConfigurationBuilder configuration_builder(fbb);
-  if (!channels_offset.IsNull()) {
-    configuration_builder.add_channels(channels_offset);
-  }
-  if (!maps_offsets.IsNull()) {
-    configuration_builder.add_maps(maps_offsets);
-  }
-  if (!nodes_offset.IsNull()) {
-    configuration_builder.add_nodes(nodes_offset);
-  }
-  if (!applications_offset.IsNull()) {
-    configuration_builder.add_applications(applications_offset);
-  }
-
-  if (base_config->has_channel_storage_duration()) {
-    configuration_builder.add_channel_storage_duration(
-        base_config->channel_storage_duration());
-  }
-
-  CHECK_EQ(Configuration::MiniReflectTypeTable()->num_elems, 6u)
-      << ": Merging logic needs to be updated when the number of configuration "
-         "fields changes.";
-
-  fbb.Finish(configuration_builder.Finish());
-
-  // Clean it up and return it!  By using MergeConfiguration here, we'll
-  // actually get a deduplicated config for free too.
-  FlatbufferDetachedBuffer<Configuration> new_merged_config =
-      configuration::MergeConfiguration(
-          FlatbufferDetachedBuffer<Configuration>(fbb.Release()));
-
-  remapped_configuration_buffer_ =
-      std::make_unique<FlatbufferDetachedBuffer<Configuration>>(
-          configuration::MergeConfiguration(new_merged_config, schemas));
-
-  remapped_configuration_ = &remapped_configuration_buffer_->message();
-
-  // TODO(austin): Lazily re-build to save CPU?
 }
 
 std::unique_ptr<const ReplayChannelIndices>
@@ -1725,57 +1283,18 @@
 }
 
 std::vector<const Channel *> LogReader::RemappedChannels() const {
-  std::vector<const Channel *> result;
-  result.reserve(remapped_channels_.size());
-  for (auto &pair : remapped_channels_) {
-    const Channel *const logged_channel =
-        CHECK_NOTNULL(logged_configuration()->channels()->Get(pair.first));
-
-    auto channel_iterator = std::lower_bound(
-        remapped_configuration_->channels()->cbegin(),
-        remapped_configuration_->channels()->cend(),
-        std::make_pair(std::string_view(pair.second.remapped_name),
-                       logged_channel->type()->string_view()),
-        CompareChannels);
-
-    CHECK(channel_iterator != remapped_configuration_->channels()->cend());
-    CHECK(EqualsChannels(
-        *channel_iterator,
-        std::make_pair(std::string_view(pair.second.remapped_name),
-                       logged_channel->type()->string_view())));
-    result.push_back(*channel_iterator);
-  }
-  return result;
+  return config_remapper_.RemappedChannels();
 }
 
 const Channel *LogReader::RemapChannel(const EventLoop *event_loop,
                                        const Node *node,
                                        const Channel *channel) {
-  std::string_view channel_name = channel->name()->string_view();
-  std::string_view channel_type = channel->type()->string_view();
-  const int channel_index =
-      configuration::ChannelIndex(logged_configuration(), channel);
-  // If the channel is remapped, find the correct channel name to use.
-  if (remapped_channels_.count(channel_index) > 0) {
-    VLOG(3) << "Got remapped channel on "
-            << configuration::CleanedChannelToString(channel);
-    channel_name = remapped_channels_[channel_index].remapped_name;
-  }
-
-  VLOG(2) << "Going to remap channel " << channel_name << " " << channel_type;
-  const Channel *remapped_channel = configuration::GetChannel(
-      configuration(), channel_name, channel_type,
-      event_loop ? event_loop->name() : "log_reader", node);
-
-  CHECK(remapped_channel != nullptr)
-      << ": Unable to send {\"name\": \"" << channel_name << "\", \"type\": \""
-      << channel_type << "\"} because it is not in the provided configuration.";
-
-  return remapped_channel;
+  return config_remapper_.RemapChannel(event_loop, node, channel);
 }
 
 LogReader::State::State(
     std::unique_ptr<TimestampMapper> timestamp_mapper,
+    TimestampQueueStrategy timestamp_queue_strategy,
     message_bridge::MultiNodeNoncausalOffsetEstimator *multinode_filters,
     std::function<void()> notice_realtime_end, const Node *node,
     LogReader::State::ThreadedBuffering threading,
@@ -1783,6 +1302,7 @@
     const std::vector<std::function<void(void *message)>>
         &before_send_callbacks)
     : timestamp_mapper_(std::move(timestamp_mapper)),
+      timestamp_queue_strategy_(timestamp_queue_strategy),
       notice_realtime_end_(notice_realtime_end),
       node_(node),
       multinode_filters_(multinode_filters),
@@ -2247,6 +1767,10 @@
         message.value().monotonic_event_time +
         std::chrono::duration_cast<std::chrono::nanoseconds>(
             std::chrono::duration<double>(FLAGS_threaded_look_ahead_seconds)));
+    VLOG(1) << "Popped " << message.value()
+            << configuration::CleanedChannelToString(
+                   event_loop_->configuration()->channels()->Get(
+                       factory_channel_index_[message->channel_index]));
     return message.value();
   } else {  // single threaded
     CHECK(timestamp_mapper_ != nullptr);
@@ -2258,7 +1782,7 @@
     VLOG(2) << "Node '" << MaybeNodeName(event_loop_->node())
             << "': PopOldest Popping " << result.monotonic_event_time;
     timestamp_mapper_->PopFront();
-    SeedSortedMessages();
+    MaybeSeedSortedMessages();
 
     CHECK_EQ(result.monotonic_event_time.boot, boot_count());
 
@@ -2305,8 +1829,27 @@
   return result_ptr->monotonic_event_time;
 }
 
-void LogReader::State::SeedSortedMessages() {
+void LogReader::State::ReadTimestamps() {
   if (!timestamp_mapper_) return;
+  timestamp_mapper_->QueueTimestamps();
+
+  // TODO(austin): Maybe make timestamp mapper do this so we don't need to clear
+  // it out?
+  //
+  // If we don't clear the timestamp callback, we end up getting a callback both
+  // when SplitTimestampBootMerger sees the timestamp, and when TimestampMapper
+  // sees it.
+  timestamp_mapper_->set_timestamp_callback([](TimestampedMessage *) {});
+}
+
+void LogReader::State::MaybeSeedSortedMessages() {
+  if (!timestamp_mapper_) return;
+
+  // The whole purpose of seeding is to make timestamp_mapper load timestamps.
+  // So if we have already loaded them, skip seeding.
+  if (timestamp_queue_strategy_ ==
+      TimestampQueueStrategy::kQueueTimestampsAtStartup)
+    return;
 
   timestamp_mapper_->QueueFor(chrono::duration_cast<chrono::seconds>(
       chrono::duration<double>(FLAGS_time_estimation_buffer_seconds)));
@@ -2413,6 +1956,10 @@
     SetFoundLastMessage(true);
     CHECK(notice_realtime_end_);
     notice_realtime_end_();
+
+    if (message_queuer_.has_value()) {
+      message_queuer_->StopPushing();
+    }
   }
 }
 

diff --git a/aos/events/logging/log_reader.h b/aos/events/logging/log_reader.h
index 0486665..d36db70 100644
--- a/aos/events/logging/log_reader.h
+++ b/aos/events/logging/log_reader.h

@@ -15,9 +15,11 @@
 #include "aos/condition.h"
 #include "aos/events/event_loop.h"
 #include "aos/events/event_loop_tmpl.h"
+#include "aos/events/logging/config_remapper.h"
 #include "aos/events/logging/logfile_sorting.h"
 #include "aos/events/logging/logfile_utils.h"
 #include "aos/events/logging/logger_generated.h"
+#include "aos/events/logging/replay_channels.h"
 #include "aos/events/logging/replay_timing_generated.h"
 #include "aos/events/shm_event_loop.h"
 #include "aos/events/simulated_event_loop.h"
@@ -35,11 +37,6 @@
 
 class EventNotifier;
 
-// Vector of pair of name and type of the channel
-using ReplayChannels = std::vector<std::pair<std::string, std::string>>;
-// Vector of channel indices
-using ReplayChannelIndices = std::vector<size_t>;
-
 // We end up with one of the following 3 log file types.
 //
 // Single node logged as the source node.
@@ -217,34 +214,21 @@
   void SetEndTime(std::string end_time);
   void SetEndTime(realtime_clock::time_point end_time);
 
-  // Enum to use for indicating how RemapLoggedChannel behaves when there is
-  // already a channel with the remapped name (e.g., as may happen when
-  // replaying a logfile that was itself generated from replay).
-  enum class RemapConflict {
-    // LOG(FATAL) on conflicts in remappings.
-    kDisallow,
-    // If we run into a conflict, attempt to remap the channel we would be
-    // overriding (and continue to do so if remapping *that* channel also
-    // generates a conflict).
-    // This will mean that if we repeatedly replay a log, we will end up
-    // stacking more and more /original's on the start of the oldest version
-    // of the channels.
-    kCascade
-  };
-
   // Causes the logger to publish the provided channel on a different name so
   // that replayed applications can publish on the proper channel name without
   // interference. This operates on raw channel names, without any node or
   // application specific mappings.
-  void RemapLoggedChannel(
-      std::string_view name, std::string_view type,
-      std::string_view add_prefix = "/original", std::string_view new_type = "",
-      RemapConflict conflict_handling = RemapConflict::kCascade);
+  void RemapLoggedChannel(std::string_view name, std::string_view type,
+                          std::string_view add_prefix = "/original",
+                          std::string_view new_type = "",
+                          ConfigRemapper::RemapConflict conflict_handling =
+                              ConfigRemapper::RemapConflict::kCascade);
   template <typename T>
-  void RemapLoggedChannel(
-      std::string_view name, std::string_view add_prefix = "/original",
-      std::string_view new_type = "",
-      RemapConflict conflict_handling = RemapConflict::kCascade) {
+  void RemapLoggedChannel(std::string_view name,
+                          std::string_view add_prefix = "/original",
+                          std::string_view new_type = "",
+                          ConfigRemapper::RemapConflict conflict_handling =
+                              ConfigRemapper::RemapConflict::kCascade) {
     RemapLoggedChannel(name, T::GetFullyQualifiedName(), add_prefix, new_type,
                        conflict_handling);
   }
@@ -257,15 +241,18 @@
   // TODO(austin): If you have 2 nodes remapping something to the same channel,
   // this doesn't handle that.  No use cases exist yet for that, so it isn't
   // being done yet.
-  void RemapLoggedChannel(
-      std::string_view name, std::string_view type, const Node *node,
-      std::string_view add_prefix = "/original", std::string_view new_type = "",
-      RemapConflict conflict_handling = RemapConflict::kCascade);
+  void RemapLoggedChannel(std::string_view name, std::string_view type,
+                          const Node *node,
+                          std::string_view add_prefix = "/original",
+                          std::string_view new_type = "",
+                          ConfigRemapper::RemapConflict conflict_handling =
+                              ConfigRemapper::RemapConflict::kCascade);
   template <typename T>
-  void RemapLoggedChannel(
-      std::string_view name, const Node *node,
-      std::string_view add_prefix = "/original", std::string_view new_type = "",
-      RemapConflict conflict_handling = RemapConflict::kCascade) {
+  void RemapLoggedChannel(std::string_view name, const Node *node,
+                          std::string_view add_prefix = "/original",
+                          std::string_view new_type = "",
+                          ConfigRemapper::RemapConflict conflict_handling =
+                              ConfigRemapper::RemapConflict::kCascade) {
     RemapLoggedChannel(name, T::GetFullyQualifiedName(), node, add_prefix,
                        new_type, conflict_handling);
   }
@@ -325,11 +312,7 @@
   // Returns true if the channel exists on the node and was logged.
   template <typename T>
   bool HasLoggedChannel(std::string_view name, const Node *node = nullptr) {
-    const Channel *channel =
-        configuration::GetChannel(logged_configuration(), name,
-                                  T::GetFullyQualifiedName(), "", node, true);
-    if (channel == nullptr) return false;
-    return channel->logger() != LoggerConfig::NOT_LOGGED;
+    return config_remapper_.HasOriginalChannel<T>(name, node);
   }
 
   // Returns a list of all the original channels from remapping.
@@ -407,9 +390,10 @@
 
   // Queues at least max_out_of_order_duration_ messages into channels_.
   void QueueMessages();
-  // Handle constructing a configuration with all the additional remapped
-  // channels from calls to RemapLoggedChannel.
-  void MakeRemappedConfig();
+
+  // Checks if any states have their event loops initialized which indicates
+  // events have been scheduled
+  void CheckEventsAreNotScheduled();
 
   // Returns the number of nodes.
   size_t nodes_count() const {
@@ -475,6 +459,7 @@
     // details.
     enum class ThreadedBuffering { kYes, kNo };
     State(std::unique_ptr<TimestampMapper> timestamp_mapper,
+          TimestampQueueStrategy timestamp_queue_strategy,
           message_bridge::MultiNodeNoncausalOffsetEstimator *multinode_filters,
           std::function<void()> notice_realtime_end, const Node *node,
           ThreadedBuffering threading,
@@ -514,17 +499,36 @@
       return node_event_loop_factory_->boot_count();
     }
 
+    // Reads all the timestamps into RAM so we don't need to manage buffering
+    // them.  For logs where the timestamps are in separate files, this
+    // minimizes RAM usage in the cases where the log reader decides to buffer
+    // to the end of the file, or where the time estimation buffer needs to be
+    // set high to sort.  This means we devote our RAM to holding lots of
+    // timestamps instead of timestamps and much larger data for a shorter
+    // period.  For logs where timestamps are stored with the data, this
+    // triggers those files to be read twice.
+    void ReadTimestamps();
+
     // Primes the queues inside State.  Should be called before calling
     // OldestMessageTime.
-    void SeedSortedMessages();
+    void MaybeSeedSortedMessages();
 
     void SetUpStartupTimer() {
       const monotonic_clock::time_point start_time =
           monotonic_start_time(boot_count());
       if (start_time == monotonic_clock::min_time) {
-        LOG(ERROR)
-            << "No start time, skipping, please figure out when this happens";
-        NotifyLogfileStart();
+        if (event_loop_->node()) {
+          LOG(ERROR) << "No start time for "
+                     << event_loop_->node()->name()->string_view()
+                     << ", skipping.";
+        } else {
+          LOG(ERROR) << "No start time, skipping.";
+        }
+
+        // This is called from OnRun. There is too much complexity in supporting
+        // OnStartup callbacks from inside OnRun.  Instead, schedule a timer for
+        // "now", and have that do what we need.
+        startup_timer_->Schedule(event_loop_->monotonic_now());
         return;
       }
       if (node_event_loop_factory_) {
@@ -757,6 +761,7 @@
     void SendMessageTimings();
     // Log file.
     std::unique_ptr<TimestampMapper> timestamp_mapper_;
+    const TimestampQueueStrategy timestamp_queue_strategy_;
 
     // Senders.
     std::vector<std::unique_ptr<RawSender>> channels_;
@@ -907,26 +912,16 @@
   message_bridge::NoncausalOffsetEstimator *GetFilter(const Node *node_a,
                                                       const Node *node_b);
 
+  // Returns the timestamp queueing strategy to use.
+  TimestampQueueStrategy ComputeTimestampQueueStrategy() const;
+
   // List of filters for a connection.  The pointer to the first node will be
   // less than the second node.
   std::unique_ptr<message_bridge::MultiNodeNoncausalOffsetEstimator> filters_;
 
-  std::unique_ptr<FlatbufferDetachedBuffer<Configuration>>
-      remapped_configuration_buffer_;
-
   std::unique_ptr<SimulatedEventLoopFactory> event_loop_factory_unique_ptr_;
   SimulatedEventLoopFactory *event_loop_factory_ = nullptr;
 
-  // Map of channel indices to new name. The channel index will be an index into
-  // logged_configuration(), and the string key will be the name of the channel
-  // to send on instead of the logged channel name.
-  struct RemappedChannel {
-    std::string remapped_name;
-    std::string new_type;
-  };
-  std::map<size_t, RemappedChannel> remapped_channels_;
-  std::vector<MapT> maps_;
-
   // Number of nodes which still have data to send.  This is used to figure out
   // when to exit.
   size_t live_nodes_ = 0;
@@ -935,7 +930,6 @@
   // running and have yet to hit the realtime end time, if any.
   size_t live_nodes_with_realtime_time_end_ = 0;
 
-  const Configuration *remapped_configuration_ = nullptr;
   const Configuration *replay_configuration_ = nullptr;
 
   // If a ReplayChannels was passed to LogReader, this will hold the
@@ -956,6 +950,7 @@
 
   realtime_clock::time_point start_time_ = realtime_clock::min_time;
   realtime_clock::time_point end_time_ = realtime_clock::max_time;
+  ConfigRemapper config_remapper_;
 };
 
 }  // namespace logger

diff --git a/aos/events/logging/log_reader_utils_test.cc b/aos/events/logging/log_reader_utils_test.cc
index b61c9de..9cd64f2 100644
--- a/aos/events/logging/log_reader_utils_test.cc
+++ b/aos/events/logging/log_reader_utils_test.cc

@@ -17,7 +17,9 @@
     All, MultinodeLoggerOneConfigTest,
     ::testing::Combine(::testing::Values(ConfigParams{
                            "multinode_pingpong_combined_config.json", true,
-                           kCombinedConfigSha1(), kCombinedConfigSha1()}),
+                           kCombinedConfigSha1(), kCombinedConfigSha1(),
+                           FileStrategy::kCombine,
+                           ForceTimestampBuffering::kForceBufferTimestamps}),
                        ::testing::ValuesIn(SupportedCompressionAlgorithms())));
 
 // This test is to check if we are able to get the right channels from a log
@@ -91,7 +93,8 @@
 
   {
     LoggerState pi1_logger = MakeLoggerState(
-        pi1_, &event_loop_factory_, SupportedCompressionAlgorithms()[0]);
+        pi1_, &event_loop_factory_, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     pi2_->DisableStatistics();
     pi2_->Disconnect(pi1_->node());
     pi1_->Disconnect(pi2_->node());

diff --git a/aos/events/logging/log_replayer.cc b/aos/events/logging/log_replayer.cc
index b69df4a..22d8b39 100644
--- a/aos/events/logging/log_replayer.cc
+++ b/aos/events/logging/log_replayer.cc

@@ -16,6 +16,8 @@
 #include "aos/events/logging/log_reader.h"
 #include "aos/events/logging/log_reader_utils.h"
 #include "aos/events/logging/log_replayer_config_generated.h"
+#include "aos/events/logging/log_replayer_stats_generated.h"
+#include "aos/events/logging/log_replayer_stats_schema.h"
 #include "aos/events/logging/logfile_sorting.h"
 #include "aos/events/logging/logfile_utils.h"
 #include "aos/events/logging/replay_timing_generated.h"
@@ -48,6 +50,8 @@
 DEFINE_string(merge_with_config, "",
               "A valid json string to be merged with config. This is used to "
               "add extra applications needed to run only for log_replayer");
+DEFINE_bool(print_stats, true,
+            "if set, prints the LogReplayerStats message as JSON to stdout");
 
 namespace aos::logger {
 
@@ -75,6 +79,16 @@
         aos::configuration::GetMyNode(raw_config), channel_overrides);
   }
 
+  // Add the LogReplayerStats channel
+  const aos::Configuration *raw_config = &config.message();
+  aos::ChannelT channel_overrides;
+  channel_overrides.max_size = 10000;
+  channel_overrides.frequency = 1;
+  config = aos::configuration::AddChannelToConfiguration(
+      raw_config, "/replay",
+      aos::FlatbufferSpan<reflection::Schema>(aos::LogReplayerStatsSchema()),
+      aos::configuration::GetMyNode(raw_config), channel_overrides);
+
   if (!FLAGS_merge_with_config.empty()) {
     config = aos::configuration::MergeWithConfig(&config.message(),
                                                  FLAGS_merge_with_config);
@@ -139,7 +153,49 @@
     event_loop.SkipAosLog();
     event_loop.SkipTimingReport();
 
+    aos::Sender<aos::LogReplayerStats> stats_sender =
+        event_loop.MakeSender<aos::LogReplayerStats>("/replay");
+    auto builder = stats_sender.MakeBuilder();
+    auto node_name = builder.fbb()->CreateString(event_loop.node()->name());
+    flatbuffers::Offset<aos::ReplayConfig> replay_config_offset;
+    if (replay_config.has_value()) {
+      replay_config_offset =
+          aos::CopyFlatBuffer(&replay_config.value().message(), builder.fbb());
+    }
+
+    auto stats_builder = builder.MakeBuilder<aos::LogReplayerStats>();
+    if (replay_config.has_value()) {
+      stats_builder.add_replay_config(replay_config_offset);
+    }
+
     reader.Register(&event_loop);
+
+    // Save off the start and end times of replay.
+    reader.OnStart(event_loop.node(), [&event_loop, &stats_builder,
+                                       &node_name]() {
+      stats_builder.add_node(node_name);
+      stats_builder.add_realtime_start_time(
+          std::chrono::nanoseconds(event_loop.realtime_now().time_since_epoch())
+              .count());
+
+      stats_builder.add_monotonic_start_time(
+          std::chrono::nanoseconds(
+              event_loop.monotonic_now().time_since_epoch())
+              .count());
+    });
+
+    reader.OnEnd(event_loop.node(), [&event_loop, &stats_builder, &builder]() {
+      stats_builder.add_realtime_end_time(
+          std::chrono::nanoseconds(event_loop.realtime_now().time_since_epoch())
+              .count());
+
+      stats_builder.add_monotonic_end_time(
+          std::chrono::nanoseconds(
+              event_loop.monotonic_now().time_since_epoch())
+              .count());
+      builder.CheckOk(builder.Send(stats_builder.Finish()));
+    });
+
     reader.OnEnd(event_loop.node(), [&event_loop]() { event_loop.Exit(); });
 
     if (FLAGS_plot_timing) {
@@ -152,6 +208,13 @@
     event_loop.Run();
 
     reader.Deregister();
+
+    if (FLAGS_print_stats) {
+      aos::Fetcher<aos::LogReplayerStats> stats_fetcher =
+          event_loop.MakeFetcher<aos::LogReplayerStats>("/replay");
+      CHECK(stats_fetcher.Fetch()) << "Failed to fetch LogReplayerStats!";
+      std::cout << aos::FlatbufferToJson(stats_fetcher.get());
+    }
   }
 
   return EXIT_SUCCESS;

diff --git a/aos/events/logging/log_replayer_stats.fbs b/aos/events/logging/log_replayer_stats.fbs
new file mode 100644
index 0000000..8e54fdb
--- /dev/null
+++ b/aos/events/logging/log_replayer_stats.fbs

@@ -0,0 +1,19 @@
+include "log_replayer_config.fbs";
+
+namespace aos;
+
+table LogReplayerStats {
+	// The ReplayConfig passed to log_replayer.
+	replay_config:ReplayConfig (id: 0);
+	// Realtime start and end times of log replay, in nanoseconds.
+	realtime_start_time:int64 (id: 1);
+	realtime_end_time:int64 (id: 2);
+	// Monotonic start and end times of log replay, in nanoseconds.
+	monotonic_start_time:int64 (id: 3);
+	monotonic_end_time:int64 (id: 4);
+    // Name of the node where the log originated from.
+    // Note: Currently, only single node replay is supported.
+	node:string (id: 5);
+}
+
+root_type LogReplayerStats;

diff --git a/aos/events/logging/log_stats.cc b/aos/events/logging/log_stats.cc
index bbed18f..2819a0b 100644
--- a/aos/events/logging/log_stats.cc
+++ b/aos/events/logging/log_stats.cc

@@ -21,6 +21,17 @@
             "Only print channels that have a set max message size that is more "
             "than double of the max message size.");
 
+DEFINE_double(
+    run_for, 0.0,
+    "If set to a positive value, only process the log for this many seconds. "
+    "Otherwise, process the log until the end of the log.");
+
+DEFINE_bool(
+    print_repack_size_diffs, false,
+    "Analyze how many bytes could be saved in each message when converted to "
+    "JSON and back. This can be helpful to identify code that is generating "
+    "inefficiently packed flatbuffer messages.");
+
 // This class implements a histogram for tracking message period
 // percentiles.
 class Histogram {
@@ -112,11 +123,14 @@
 class ChannelStats {
  public:
   ChannelStats(const aos::Channel *channel, const aos::Node *destination_node,
-               aos::SimulatedEventLoopFactory *factory)
+               aos::SimulatedEventLoopFactory *factory,
+               const reflection::Schema *schema)
       : channel_(channel),
         config_(factory->configuration()),
         factory_(factory),
-        destination_node_(destination_node) {
+        schema_(CHECK_NOTNULL(schema)),
+        destination_node_(destination_node),
+        flatbuffer_type_(schema) {
     // Multi-node channel
     if (channel_->has_source_node() && channel_->has_destination_nodes() &&
         channel_->destination_nodes()->size() > 0) {
@@ -132,6 +146,22 @@
 
   // Adds a sample to the statistics.
   void Add(const aos::Context &context) {
+    if (context.size > 0 && context.data != nullptr) {
+      // Perform a very naive repacking of the message. Ideally, we'd use
+      // something like RecursiveCopyFlatBuffer that works with schemas.
+      // For now, this is good enough.
+      const std::string json = aos::FlatbufferToJson(
+          schema_, static_cast<const uint8_t *>(context.data));
+      flatbuffers::DetachedBuffer buffer =
+          aos::JsonToFlatbuffer(json, flatbuffer_type_);
+
+      const ssize_t packed_size_reduction = static_cast<ssize_t>(context.size) -
+                                            static_cast<ssize_t>(buffer.size());
+      max_packed_size_reduction_ =
+          std::max(max_packed_size_reduction_, packed_size_reduction);
+      total_packed_size_reduction_ += packed_size_reduction;
+    }
+
     max_message_size_ = std::max(max_message_size_, context.size);
     total_message_size_ += context.size;
     total_num_messages_++;
@@ -182,6 +212,10 @@
   size_t max_message_size() const { return max_message_size_; }
   size_t total_num_messages() const { return total_num_messages_; }
 
+  ssize_t max_packed_size_reduction() const {
+    return max_packed_size_reduction_;
+  }
+
   double avg_messages_per_sec() const {
     return total_num_messages_ / SecondsActive();
   }
@@ -200,6 +234,10 @@
     return total_message_size_ / SecondsActive();
   }
 
+  ssize_t avg_packed_size_reduction() const {
+    return total_packed_size_reduction_ / total_num_messages_;
+  }
+
   aos::realtime_clock::time_point channel_end_time() const {
     return channel_end_time_;
   }
@@ -229,6 +267,7 @@
   const aos::Channel *channel_;
   const aos::Configuration *config_;
   aos::SimulatedEventLoopFactory *factory_;
+  const reflection::Schema *const schema_;
   aos::realtime_clock::time_point channel_end_time_ =
       aos::realtime_clock::min_time;
   aos::monotonic_clock::time_point first_message_time_ =
@@ -247,6 +286,12 @@
   size_t max_message_size_ = 0;
   size_t total_message_size_ = 0;
 
+  // The size reduction (in bytes) after a naive repacking. A negative number
+  // indicates that the repacking generated a _bigger_ message than the
+  // original message.
+  ssize_t max_packed_size_reduction_ = 0;
+  ssize_t total_packed_size_reduction_ = 0;
+
   // Count of messages which had remote timestamps
   size_t num_messages_with_remote_ = 0;
   // Sum of latencies in all messages sent on this channel if multinode
@@ -256,6 +301,8 @@
 
   const aos::Node *source_node_ = nullptr;
   const aos::Node *destination_node_;
+
+  const aos::FlatbufferType flatbuffer_type_;
 };
 
 struct LogfileStats {
@@ -335,16 +382,24 @@
     }
 
     // Add a record to the stats vector.
-    channel_stats.push_back({channel, node, &log_reader_factory});
+    channel_stats.push_back(ChannelStats{
+        channel, node, &log_reader_factory,
+        aos::configuration::GetSchema(reader.configuration(),
+                                      channel->type()->string_view())});
     // Lambda to read messages and parse for information
-    stats_event_loop->MakeRawNoArgWatcher(
-        channel,
-        [&logfile_stats, &channel_stats, it](const aos::Context &context) {
-          channel_stats[it].Add(context);
+    auto watcher = [&logfile_stats, &channel_stats,
+                    it](const aos::Context &context) {
+      channel_stats[it].Add(context);
 
-          // Update the overall logfile statistics
-          logfile_stats.logfile_length += context.size;
-        });
+      // Update the overall logfile statistics
+      logfile_stats.logfile_length += context.size;
+    };
+    if (FLAGS_print_repack_size_diffs) {
+      stats_event_loop->MakeRawWatcher(
+          channel, std::bind(watcher, ::std::placeholders::_1));
+    } else {
+      stats_event_loop->MakeRawNoArgWatcher(channel, watcher);
+    }
     it++;
     // TODO (Stephan): Frequency of messages per second
     // - Sliding window
@@ -355,7 +410,13 @@
     LOG(FATAL) << "Could not find any channels";
   }
 
-  log_reader_factory.Run();
+  if (FLAGS_run_for > 0.0) {
+    log_reader_factory.RunFor(
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+            std::chrono::duration<double>(FLAGS_run_for)));
+  } else {
+    log_reader_factory.Run();
+  }
 
   std::cout << std::endl;
 
@@ -388,6 +449,13 @@
                   << "bytes, " << channel_stats[i].Percentile() << ", "
                   << channel_stats[i].AvgLatency();
         std::cout << std::endl;
+        if (FLAGS_print_repack_size_diffs) {
+          std::cout << "   " << channel_stats[i].avg_packed_size_reduction()
+                    << " bytes packed reduction avg, "
+                    << channel_stats[i].max_packed_size_reduction()
+                    << " bytes packed reduction max";
+          std::cout << std::endl;
+        }
       }
     }
   }

diff --git a/aos/events/logging/log_writer.cc b/aos/events/logging/log_writer.cc
index db6bcc1..bc81a4b 100644
--- a/aos/events/logging/log_writer.cc
+++ b/aos/events/logging/log_writer.cc

@@ -27,10 +27,11 @@
       node_(configuration::GetNode(configuration_, event_loop->node())),
       node_index_(configuration::GetNodeIndex(configuration_, node_)),
       name_(network::GetHostname()),
-      timer_handler_(event_loop_->AddTimer(
-          [this]() { DoLogData(event_loop_->monotonic_now(), true); })),
+      timer_handler_(event_loop_->AddTimer([this]() {
+        DoLogData(event_loop_->monotonic_now() - logging_delay_, true);
+      })),
       server_statistics_fetcher_(
-          configuration::MultiNode(event_loop_->configuration())
+          configuration::NodesCount(event_loop_->configuration()) > 1u
               ? event_loop_->MakeFetcher<message_bridge::ServerStatistics>(
                     "/aos")
               : aos::Fetcher<message_bridge::ServerStatistics>()) {
@@ -151,7 +152,7 @@
     const bool log_message = is_logged && is_readable;
 
     bool log_delivery_times = false;
-    if (configuration::MultiNode(configuration_)) {
+    if (configuration::NodesCount(configuration_) > 1u) {
       const aos::Connection *connection =
           configuration::ConnectionToNode(config_channel, node_);
 
@@ -249,7 +250,7 @@
     aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> config_header =
         PackConfiguration(configuration_);
     config_sha256 = Sha256(config_header.span());
-    LOG(INFO) << "Config sha256 of " << config_sha256;
+    VLOG(1) << "Config sha256 of " << config_sha256;
     log_namer->WriteConfiguration(&config_header, config_sha256);
   }
 
@@ -268,6 +269,7 @@
 
   log_event_uuid_ = UUID::Random();
   log_start_uuid_ = log_start_uuid;
+  log_namer_->SetHeaderTemplate(MakeHeader(config_sha256));
 
   // We want to do as much work as possible before the initial Fetch. Time
   // between that and actually starting to log opens up the possibility of
@@ -286,11 +288,11 @@
     }
   }
 
-  log_namer_->SetHeaderTemplate(MakeHeader(config_sha256));
-
   const aos::monotonic_clock::time_point beginning_time =
       event_loop_->monotonic_now();
 
+  log_until_time_ = beginning_time;
+
   // Grab data from each channel right before we declare the log file started
   // so we can capture the latest message on each channel.  This lets us have
   // non periodic messages with configuration that now get logged.
@@ -338,77 +340,45 @@
 }
 
 std::unique_ptr<LogNamer> Logger::RestartLogging(
-    std::unique_ptr<LogNamer> log_namer, std::optional<UUID> log_start_uuid) {
+    std::unique_ptr<LogNamer> log_namer, std::optional<UUID> log_start_uuid,
+    std::optional<monotonic_clock::time_point> end_time) {
   CHECK(log_namer_) << ": Unexpected restart while not logging";
 
   VLOG(1) << "Restarting logger for " << FlatbufferToJson(node_);
 
-  // Make sure not to write past now so we don't risk out of order problems.  We
-  // don't want to get into a situation where we write out up to now + 0.1 sec,
-  // and that operation takes ~0.1 seconds, so we end up writing a different
-  // amount of the early and late channels.  That would then result in the next
-  // go around finding more than 0.1 sec of data on the early channels.
-  //
-  // Make sure we read up until "now" and log it.  This sets us up so that we
-  // are unlikely to fetch a message far in the future and have a ton of data
-  // before the offical start time.
-  monotonic_clock::time_point newest_record = monotonic_clock::min_time;
-  while (true) {
-    aos::monotonic_clock::time_point next_time =
-        last_synchronized_time_ + polling_period_;
-    const aos::monotonic_clock::time_point monotonic_now =
-        event_loop_->monotonic_now();
-    if (next_time > monotonic_now) {
-      next_time = monotonic_now;
-    }
+  // Grab a representative time on both the RT and monotonic clock.
+  // Average a monotonic clock before and after to reduce the error.
+  const aos::monotonic_clock::time_point monotonic_now1 =
+      event_loop_->monotonic_now();
+  const aos::realtime_clock::time_point realtime_now =
+      event_loop_->realtime_now();
+  const aos::monotonic_clock::time_point monotonic_now2 =
+      event_loop_->monotonic_now();
 
-    bool wrote_messages = false;
-    std::tie(wrote_messages, newest_record) = LogUntil(next_time);
-
-    if (next_time == monotonic_now &&
-        (!wrote_messages || newest_record < monotonic_now + polling_period_)) {
-      // If we stopped writing messages, then we probably have stopped making
-      // progress. If the newest record (unwritten or written) on a channel is
-      // very close to the current time, then there won't be much data
-      // officially after the end of the last log but before the start of the
-      // current one.  We need to pick the start of the current log to be after
-      // the last message on record so we don't have holes in the log.
-      break;
+  // Log until the provided end time.
+  if (end_time) {
+    CHECK_LE(*end_time, monotonic_now1) << ": Can't log into the future.";
+    // DoLogData is a bit fragile.
+    if (*end_time > last_synchronized_time_) {
+      DoLogData(*end_time, false);
     }
   }
 
-  // We are now synchronized up to last_synchronized_time_.  Our start time can
-  // safely be "newest_record".  But, we need to guarentee that the start time
-  // is after the newest message we have a record of, and that we don't skip any
-  // messages as we rotate.  This means we can't call Fetch anywhere.
+  // We are now synchronized up to last_synchronized_time_.  We only have record
+  // of messages from before last_synchronized_time_, so it is a safe start
+  // time.
 
   std::unique_ptr<LogNamer> old_log_namer = std::move(log_namer_);
   log_namer_ = std::move(log_namer);
 
-  // Now grab a representative time on both the RT and monotonic clock.
-  // Average a monotonic clock before and after to reduce the error.
-  const aos::monotonic_clock::time_point beginning_time =
-      event_loop_->monotonic_now();
-  const aos::realtime_clock::time_point beginning_time_rt =
-      event_loop_->realtime_now();
-  const aos::monotonic_clock::time_point beginning_time2 =
-      event_loop_->monotonic_now();
-
-  if (beginning_time > last_synchronized_time_) {
-    LOG(WARNING) << "Took over " << polling_period_.count()
-                 << "ns to swap log_namer";
-  }
-
-  // Our start time is now the newest message we have a record of.  We will
-  // declare the old log "done", and start in on the new one, double-logging
-  // anything we have a record of so we have all the messages from before the
-  // start.
-  const aos::monotonic_clock::time_point monotonic_start_time = newest_record;
+  // Our start time is now how far we logged until before.
+  const aos::monotonic_clock::time_point monotonic_start_time =
+      last_synchronized_time_;
   const aos::realtime_clock::time_point realtime_start_time =
-      (beginning_time_rt + (monotonic_start_time.time_since_epoch() -
-                            ((beginning_time.time_since_epoch() +
-                              beginning_time2.time_since_epoch()) /
-                             2)));
+      (realtime_now + (last_synchronized_time_.time_since_epoch() -
+                       ((monotonic_now1.time_since_epoch() +
+                         monotonic_now2.time_since_epoch()) /
+                        2)));
 
   auto config_sha256 = WriteConfiguration(log_namer_.get());
 
@@ -459,7 +429,7 @@
 
   VLOG(1) << "Logging node as " << FlatbufferToJson(node_) << " restart_time "
           << last_synchronized_time_ << ", took "
-          << chrono::duration<double>(header_time - beginning_time).count()
+          << chrono::duration<double>(header_time - monotonic_now1).count()
           << " to prepare and write header, "
           << chrono::duration<double>(channel_time - header_time).count()
           << " to write initial channel messages, boot uuid "
@@ -497,7 +467,7 @@
 
 void Logger::WriteHeader(aos::monotonic_clock::time_point monotonic_start_time,
                          aos::realtime_clock::time_point realtime_start_time) {
-  if (configuration::MultiNode(configuration_)) {
+  if (configuration::NodesCount(configuration_) > 1u) {
     server_statistics_fetcher_.Fetch();
   }
 
@@ -520,7 +490,7 @@
 }
 
 void Logger::WriteMissingTimestamps() {
-  if (configuration::MultiNode(configuration_)) {
+  if (configuration::NodesCount(configuration_) > 1u) {
     server_statistics_fetcher_.Fetch();
   } else {
     return;
@@ -714,6 +684,8 @@
   total_copy_time_ = std::chrono::nanoseconds::zero();
   total_copy_count_ = 0;
   total_copy_bytes_ = 0;
+  max_log_delay_ = std::chrono::nanoseconds::zero();
+  max_log_delay_channel_ = -1;
 }
 
 void Logger::Rotate() {
@@ -734,7 +706,12 @@
     ContextDataCopier coppier(f.fetcher->context(), f.channel_index, f.log_type,
                               event_loop_);
 
-    writer->CopyMessage(&coppier, source_node_boot_uuid, start);
+    aos::monotonic_clock::time_point message_time =
+        static_cast<int>(node_index_) != f.data_node_index
+            ? f.fetcher->context().monotonic_remote_time
+            : f.fetcher->context().monotonic_event_time;
+    writer->CopyDataMessage(&coppier, source_node_boot_uuid, start,
+                            message_time);
     RecordCreateMessageTime(start, coppier.end_time(), f);
 
     VLOG(2) << "Wrote data as node " << FlatbufferToJson(node_)
@@ -759,7 +736,9 @@
     ContextDataCopier coppier(f.fetcher->context(), f.channel_index,
                               LogType::kLogDeliveryTimeOnly, event_loop_);
 
-    timestamp_writer->CopyMessage(&coppier, event_loop_->boot_uuid(), start);
+    timestamp_writer->CopyTimestampMessage(
+        &coppier, event_loop_->boot_uuid(), start,
+        f.fetcher->context().monotonic_event_time);
     RecordCreateMessageTime(start, coppier.end_time(), f);
 
     VLOG(2) << "Wrote timestamps as node " << FlatbufferToJson(node_)
@@ -809,8 +788,10 @@
     RemoteMessageCopier coppier(msg, channel_index, monotonic_timestamp_time,
                                 event_loop_);
 
-    contents_writer->CopyMessage(&coppier, UUID::FromVector(msg->boot_uuid()),
-                                 start);
+    contents_writer->CopyRemoteTimestampMessage(
+        &coppier, UUID::FromVector(msg->boot_uuid()), start,
+        monotonic_clock::time_point(
+            chrono::nanoseconds(msg->monotonic_sent_time())));
 
     RecordCreateMessageTime(start, coppier.end_time(), f);
   }
@@ -827,6 +808,8 @@
   bool wrote_messages = false;
   monotonic_clock::time_point newest_record = monotonic_clock::min_time;
 
+  log_until_time_ = t;
+
   // Grab the latest ServerStatistics message.  This will always have the
   // oppertunity to be >= to the current time, so it will always represent any
   // reboots which may have happened.
@@ -842,7 +825,8 @@
     while (true) {
       if (f.written) {
         const auto start = event_loop_->monotonic_now();
-        const bool got_new = f.fetcher->FetchNext();
+        const bool got_new =
+            f.fetcher->FetchNextIf(std::ref(fetch_next_if_fn_));
         const auto end = event_loop_->monotonic_now();
         RecordFetchResult(start, end, got_new, &f);
         if (!got_new) {
@@ -857,7 +841,11 @@
       }
 
       // TODO(james): Write tests to exercise this logic.
-      if (f.fetcher->context().monotonic_event_time >= t) {
+      CHECK_LE(f.fetcher->context().monotonic_event_time, t);
+
+      // At startup, we can end up grabbing a message at the current time.
+      // Ignore it.
+      if (f.fetcher->context().monotonic_event_time == t) {
         break;
       }
 
@@ -874,6 +862,9 @@
 
 void Logger::DoLogData(const monotonic_clock::time_point end_time,
                        bool run_on_logged) {
+  if (end_time < last_synchronized_time_) return;
+
+  DCHECK(is_started());
   // We want to guarantee that messages aren't out of order by more than
   // max_out_of_order_duration.  To do this, we need sync points.  Every write
   // cycle should be a sync point.
@@ -884,7 +875,7 @@
     LogUntil(std::min(last_synchronized_time_ + polling_period_, end_time));
 
     if (run_on_logged) {
-      on_logged_period_();
+      on_logged_period_(last_synchronized_time_);
     }
 
     // If we missed cycles, we could be pretty far behind.  Spin until we are
@@ -923,6 +914,11 @@
     max_copy_time_channel_ = fetcher.channel_index;
     max_copy_time_size_ = fetcher.fetcher->context().size;
   }
+  const auto log_delay = end - fetcher.fetcher->context().monotonic_event_time;
+  if (log_delay > max_log_delay_) {
+    max_log_delay_ = log_delay;
+    max_log_delay_channel_ = fetcher.channel_index;
+  }
 }
 
 }  // namespace logger

diff --git a/aos/events/logging/log_writer.h b/aos/events/logging/log_writer.h
index 837a02a..ec02b7d 100644
--- a/aos/events/logging/log_writer.h
+++ b/aos/events/logging/log_writer.h

@@ -54,11 +54,14 @@
     logger_version_ = version;
   }
 
-  // Sets the callback to run after each period of data is logged. Defaults to
-  // doing nothing.
+  // Sets the callback to run *after* each period of data is logged.  Defaults
+  // to doing nothing.  The argument to the callback is the time which we just
+  // wrote until.  This is not called when rotating or finishing logs.
   //
   // This callback may safely do things like call Rotate().
-  void set_on_logged_period(std::function<void()> on_logged_period) {
+  void set_on_logged_period(
+      std::function<void(aos::monotonic_clock::time_point t)>
+          on_logged_period) {
     on_logged_period_ = std::move(on_logged_period);
   }
 
@@ -66,6 +69,17 @@
     separate_config_ = separate_config;
   }
 
+  // Sets the amount to run the logger behind the current time.  This lets us
+  // make decisions about rotating or stopping logging before something happens.
+  // Using this to start logging in the past isn't yet supported.  This can be
+  // changed at runtime, but will only influence future writes, not what is
+  // already written.
+  void set_logging_delay(std::chrono::nanoseconds logging_delay) {
+    logging_delay_ = logging_delay;
+  }
+  // Returns the current logging delay.
+  std::chrono::nanoseconds logging_delay() const { return logging_delay_; }
+
   // Sets the period between polling the data. Defaults to 100ms.
   //
   // Changing this while a set of files is being written may result in
@@ -125,6 +139,13 @@
   // The total number of bytes copied.
   int64_t total_copy_bytes() const { return total_copy_bytes_; }
 
+  // The maximum time between when a message was sent and when it was logged.
+  // This is 0 if no message has been logged.
+  std::chrono::nanoseconds max_log_delay() const { return max_log_delay_; }
+  // The channel for longest logging delay, or -1 if no messages have been
+  // logged.
+  int max_log_delay_channel() const { return max_log_delay_channel_; }
+
   void ResetStatisics();
 
   // Rotates the log file(s), triggering new part files to be written for each
@@ -139,11 +160,14 @@
   void StartLogging(std::unique_ptr<LogNamer> log_namer,
                     std::optional<UUID> log_start_uuid = std::nullopt);
 
-  // Restart logging using a new naming scheme. Intended for log rotation.
-  // Returns a unique_ptr to the prior log_namer instance.
+  // Restarts logging using a new naming scheme. Intended for log rotation.
+  // Returns a unique_ptr to the prior log_namer instance.  If provided,
+  // end_time is the time to log until.  It must be in the past.  Times before
+  // the last_synchronized_time are ignored.
   std::unique_ptr<LogNamer> RestartLogging(
       std::unique_ptr<LogNamer> log_namer,
-      std::optional<UUID> log_start_uuid = std::nullopt);
+      std::optional<UUID> log_start_uuid = std::nullopt,
+      std::optional<monotonic_clock::time_point> end_time = std::nullopt);
 
   // Stops logging. Ensures any messages through end_time make it into the log.
   //
@@ -167,6 +191,10 @@
     });
   }
 
+  // Returns the current log event UUID.  This is randomly assigned when the log
+  // starts or restarts.
+  const UUID &log_event_uuid() const { return log_event_uuid_; }
+
  private:
   // Structure to track both a fetcher, and if the data fetched has been
   // written.  We may want to delay writing data to disk so that we don't let
@@ -295,7 +323,10 @@
   std::string logger_sha1_;
   std::string logger_version_;
 
-  std::function<void()> on_logged_period_ = []() {};
+  // The callback to get called on each logged period.  See
+  // set_on_logged_period() above for more details.
+  std::function<void(aos::monotonic_clock::time_point t)> on_logged_period_ =
+      [](aos::monotonic_clock::time_point) {};
 
   std::chrono::nanoseconds max_message_fetch_time_ =
       std::chrono::nanoseconds::zero();
@@ -306,6 +337,9 @@
   int total_message_fetch_count_ = 0;
   int64_t total_message_fetch_bytes_ = 0;
 
+  std::chrono::nanoseconds max_log_delay_ = std::chrono::nanoseconds::zero();
+  int max_log_delay_channel_ = -1;
+
   std::chrono::nanoseconds total_nop_fetch_time_ =
       std::chrono::nanoseconds::zero();
   int total_nop_fetch_count_ = 0;
@@ -331,6 +365,16 @@
 
   // Fetcher for all the statistics from all the nodes.
   aos::Fetcher<message_bridge::ServerStatistics> server_statistics_fetcher_;
+
+  monotonic_clock::time_point log_until_time_ = monotonic_clock::min_time;
+
+  std::function<bool(const Context &)> fetch_next_if_fn_ =
+      [this](const Context &context) {
+        return context.monotonic_event_time < log_until_time_;
+      };
+
+  // Amount of time to run the logger behind now.
+  std::chrono::nanoseconds logging_delay_ = std::chrono::nanoseconds(0);
 };
 
 }  // namespace logger

diff --git a/aos/events/logging/logfile_sorting.cc b/aos/events/logging/logfile_sorting.cc
index 59d18d0..b8530fb 100644
--- a/aos/events/logging/logfile_sorting.cc
+++ b/aos/events/logging/logfile_sorting.cc

@@ -11,6 +11,7 @@
 #include "absl/container/btree_map.h"
 #include "absl/strings/str_join.h"
 
+#include "aos/containers/error_list.h"
 #include "aos/events/logging/file_operations.h"
 #include "aos/events/logging/logfile_utils.h"
 #include "aos/flatbuffer_merge.h"
@@ -48,7 +49,7 @@
 }
 
 bool ConfigOnly(const LogFileHeader *header) {
-  CHECK_EQ(LogFileHeader::MiniReflectTypeTable()->num_elems, 34u);
+  CHECK_EQ(LogFileHeader::MiniReflectTypeTable()->num_elems, 35u);
   if (header->has_monotonic_start_time()) return false;
   if (header->has_realtime_start_time()) return false;
   if (header->has_max_out_of_order_duration()) return false;
@@ -80,6 +81,7 @@
     return false;
   if (header->has_logger_sha1()) return false;
   if (header->has_logger_version()) return false;
+  if (header->has_data_stored()) return false;
 
   return header->has_configuration();
 }
@@ -138,7 +140,7 @@
 
 // Start by grouping all parts by UUID, and extracting the part index.
 // Datastructure to hold all the info extracted from a set of parts which go
-// together so we can sort them afterwords.
+// together so we can sort them afterwards.
 struct UnsortedLogParts {
   // Start times.
   aos::monotonic_clock::time_point monotonic_start_time;
@@ -165,6 +167,18 @@
   std::vector<std::pair<std::string, int>> parts;
 
   std::string config_sha256;
+
+  // Largest max out of order duration among parts with monotonic start
+  // time greater than min_time
+  std::optional<std::chrono::nanoseconds>
+      max_out_of_order_duration_valid_start_time = std::nullopt;
+  // Largest max out of order duration among parts with monotonic start time
+  // equal to min_time.
+  std::optional<std::chrono::nanoseconds>
+      max_out_of_order_duration_min_start_time = std::nullopt;
+
+  // All the data types we've seen so far, or all of them if we don't know.
+  aos::ErrorList<StoredDataType> data_stored;
 };
 
 // Struct to hold both the node, and the parts associated with it.
@@ -370,6 +384,9 @@
     const realtime_clock::time_point logger_realtime_start_time(
         chrono::nanoseconds(
             log_header->message().logger_realtime_start_time()));
+    const std::chrono::nanoseconds max_out_of_order_duration =
+        std::chrono::nanoseconds(
+            log_header->message().max_out_of_order_duration());
 
     const std::string_view node =
         log_header->message().has_node()
@@ -505,7 +522,12 @@
         old_parts.back().parts.config_sha256 = configuration_sha256;
         old_parts.back().unsorted_parts.emplace_back(
             std::make_pair(first_message_time, part.name));
+        old_parts.back().parts.max_out_of_order_duration =
+            max_out_of_order_duration;
         old_parts.back().name = name;
+        old_parts.back().parts.data_stored = {
+            StoredDataType::DATA, StoredDataType::TIMESTAMPS,
+            StoredDataType::REMOTE_TIMESTAMPS};
       } else {
         result->unsorted_parts.emplace_back(
             std::make_pair(first_message_time, part.name));
@@ -583,6 +605,33 @@
     } else {
       CHECK_EQ(it->second.config_sha256, configuration_sha256);
     }
+    if (log_header->message().has_data_stored()) {
+      for (const StoredDataType type : *log_header->message().data_stored()) {
+        it->second.data_stored.Set(type);
+      }
+    } else {
+      it->second.data_stored.Set(StoredDataType::DATA);
+      it->second.data_stored.Set(StoredDataType::TIMESTAMPS);
+      it->second.data_stored.Set(StoredDataType::REMOTE_TIMESTAMPS);
+    }
+    // Keep track of the largest max out of order duration times based on
+    // whether monotonic start time is available. We'll decide which value to
+    // use later when creating log files.
+    if (monotonic_start_time == monotonic_clock::min_time) {
+      it->second.max_out_of_order_duration_min_start_time =
+          it->second.max_out_of_order_duration_min_start_time.has_value()
+              ? std::max(
+                    it->second.max_out_of_order_duration_min_start_time.value(),
+                    max_out_of_order_duration)
+              : max_out_of_order_duration;
+    } else {
+      it->second.max_out_of_order_duration_valid_start_time =
+          it->second.max_out_of_order_duration_valid_start_time.has_value()
+              ? std::max(it->second.max_out_of_order_duration_valid_start_time
+                             .value(),
+                         max_out_of_order_duration)
+              : max_out_of_order_duration;
+    }
 
     // We've got a newer log with boot_uuids, and oldest timestamps.  Fill in
     // this->boot_times with the info we have found.
@@ -1885,7 +1934,18 @@
       new_parts.parts_uuid = parts.first.first;
       new_parts.node = std::move(parts.second.node);
       new_parts.boots = boot_counts;
-
+      new_parts.data_stored.reserve(parts.second.data_stored.size());
+      for (const StoredDataType data : parts.second.data_stored) {
+        new_parts.data_stored.push_back(data);
+      }
+      // If there are no part files which have a monotonic start time greater
+      // than min time, use the max of whatever we have, else chose the max out
+      // of order duration of parts with monotonic start time greater than min
+      // time.
+      new_parts.max_out_of_order_duration =
+          parts.second.max_out_of_order_duration_valid_start_time.has_value()
+              ? parts.second.max_out_of_order_duration_valid_start_time.value()
+              : parts.second.max_out_of_order_duration_min_start_time.value();
       {
         auto boot_count_it =
             boot_counts->boot_count_map.find(new_parts.source_boot_uuid);
@@ -2096,9 +2156,19 @@
     stream << ",\n  \"logger_realtime_start_time\": \""
            << parts.logger_realtime_start_time << "\"";
   }
+  stream << ",\n  \"max_out_of_order_duration\": \""
+         << parts.max_out_of_order_duration.count() << "\"";
+
   stream << ",\n  \"monotonic_start_time\": \"" << parts.monotonic_start_time
          << "\",\n  \"realtime_start_time\": \"" << parts.realtime_start_time
-         << "\",\n  \"parts\": [";
+         << "\",\n  \"data_stored\": [";
+  for (size_t i = 0; i < parts.data_stored.size(); ++i) {
+    if (i != 0u) {
+      stream << ", ";
+    }
+    stream << "\"" << EnumNameStoredDataType(parts.data_stored[i]) << "\"";
+  }
+  stream << "],\n  \"parts\": [";
 
   for (size_t i = 0; i < parts.parts.size(); ++i) {
     if (i != 0u) {
@@ -2113,7 +2183,7 @@
 
 // Validates that collection of log files or log parts shares the same configs.
 template <typename TCollection>
-bool CheckMatchingConfigs(const TCollection &items) {
+bool HasMatchingConfigsTemplate(const TCollection &items) {
   const Configuration *config = nullptr;
   for (const auto &item : items) {
     VLOG(1) << item;
@@ -2134,12 +2204,23 @@
   return true;
 }
 
+// Validates that collection of log files or log parts shares the same configs.
+bool HasMatchingConfigs(const std::vector<LogFile> &items) {
+  return HasMatchingConfigsTemplate(items);
+}
+
 // Provides unified access to config field stored in LogFile. It is used in
-// CheckMatchingConfigs.
+// HasMatchingConfigs.
 inline const Configuration *GetConfig(const LogFile &log_file) {
   return log_file.config.get();
 }
 
+// Provides unified access to config field stored in LogPartsAccess. It is used
+// in HasMatchingConfigs.
+inline const Configuration *GetConfig(const LogPartsAccess &log_parts_access) {
+  return log_parts_access.config().get();
+}
+
 // Output of LogPartsAccess for debug purposes.
 std::ostream &operator<<(std::ostream &stream,
                          const LogPartsAccess &log_parts_access) {
@@ -2153,11 +2234,18 @@
     : node_name_(node_name),
       boot_index_(boot_index),
       log_parts_(std::move(log_parts)) {
-  CHECK_GT(log_parts_.size(), 0u) << ": Nothing was selected for node "
-                                  << node_name_ << " boot " << boot_index_;
-  CHECK(CheckMatchingConfigs(log_parts_));
+  if (log_parts_.empty()) {
+    VLOG(1) << "Nothing was selected for node " << node_name_ << " boot "
+            << boot_index_;
+    return;
+  }
+  CHECK(HasMatchingConfigsTemplate(log_parts_));
   config_ = log_parts_.front().config();
 
+  for (LogPartsAccess &part : log_parts_) {
+    CHECK_EQ(config_.get(), part.config().get());
+  }
+
   // Enforce that we are sorting things only from a single node from a single
   // boot.
   const std::string_view part0_source_boot_uuid =
@@ -2174,14 +2262,15 @@
     std::optional<const LogSource *> log_source, std::vector<LogFile> log_files)
     : log_source_(log_source), log_files_(std::move(log_files)) {
   CHECK_GT(log_files_.size(), 0u);
-  CHECK(CheckMatchingConfigs(log_files_));
-  config_ = log_files_.front().config.get();
+  CHECK(HasMatchingConfigsTemplate(log_files_));
+  config_ = log_files_.front().config;
   boots_ = log_files_.front().boots;
 
   std::unordered_set<std::string> logger_nodes;
 
   // Scan and collect all related nodes and number of reboots per node.
   for (const LogFile &log_file : log_files_) {
+    CHECK_EQ(config_.get(), log_file.config.get());
     for (const LogParts &part : log_file.parts) {
       auto node_item = nodes_boots_.find(part.node);
       if (node_item != nodes_boots_.end()) {
@@ -2206,12 +2295,28 @@
   return node_item->second;
 }
 
-SelectedLogParts LogFilesContainer::SelectParts(std::string_view node_name,
-                                                size_t boot_index) const {
+SelectedLogParts LogFilesContainer::SelectParts(
+    std::string_view node_name, size_t boot_index,
+    const std::vector<StoredDataType> &types) const {
   std::vector<LogPartsAccess> result;
   for (const LogFile &log_file : log_files_) {
     for (const LogParts &part : log_file.parts) {
       if (part.node == node_name && part.boot_count == boot_index) {
+        bool found = false;
+        for (const StoredDataType type : types) {
+          for (const StoredDataType part_type : part.data_stored) {
+            if (type == part_type) {
+              found = true;
+              break;
+            }
+          }
+          if (found) {
+            break;
+          }
+        }
+        if (!found) {
+          continue;
+        }
         result.emplace_back(log_source_, part);
       }
     }
@@ -2219,5 +2324,54 @@
   return SelectedLogParts(node_name, boot_index, result);
 }
 
+bool LogFilesContainer::TimestampsStoredSeparately() const {
+  for (const LogFile &log_file : log_files_) {
+    for (const LogParts &part : log_file.parts) {
+      bool has_data = false;
+      bool has_timestamps = false;
+      bool has_remote_timestamps = false;
+      for (StoredDataType type : part.data_stored) {
+        switch (type) {
+          case StoredDataType::DATA:
+            has_data = true;
+            break;
+          case StoredDataType::TIMESTAMPS:
+            has_timestamps = true;
+            break;
+          case StoredDataType::REMOTE_TIMESTAMPS:
+            has_remote_timestamps = true;
+            break;
+        }
+      }
+
+      if (has_data && (has_timestamps || has_remote_timestamps)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool LogFilesContainer::HasTimestamps(std::string_view node_name) const {
+  for (const LogFile &log_file : log_files_) {
+    for (const LogParts &part : log_file.parts) {
+      if (part.node != node_name) {
+        continue;
+      }
+
+      for (StoredDataType type : part.data_stored) {
+        switch (type) {
+          case StoredDataType::DATA:
+            break;
+          case StoredDataType::TIMESTAMPS:
+          case StoredDataType::REMOTE_TIMESTAMPS:
+            return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 }  // namespace logger
 }  // namespace aos

diff --git a/aos/events/logging/logfile_sorting.h b/aos/events/logging/logfile_sorting.h
index b7f8297..e1aa620 100644
--- a/aos/events/logging/logfile_sorting.h
+++ b/aos/events/logging/logfile_sorting.h

@@ -84,6 +84,13 @@
 
   // Information about all the boots that the system has observed.
   std::shared_ptr<const Boots> boots;
+
+  // Highest max out of order durations among all parts.
+  std::chrono::nanoseconds max_out_of_order_duration;
+
+  // The types of data we know are in this log file.  If we have no info, this
+  // will be a vector of all the potential types.
+  std::vector<StoredDataType> data_stored;
 };
 
 // Datastructure to hold parts from the same run of the logger which have no
@@ -143,6 +150,9 @@
 // Sort parts of a single log.
 std::vector<LogFile> SortParts(const LogSource &log_source);
 
+// Validates that collection of log files or log parts shares the same configs.
+bool HasMatchingConfigs(const std::vector<LogFile> &items);
+
 // Recursively searches the file/folder for .bfbs and .bfbs.xz files and adds
 // them to the vector.
 void FindLogs(std::vector<internal::FileOperations::File> *files,
@@ -173,10 +183,16 @@
 
   size_t boot_count() const { return log_parts_.boot_count; }
 
-  const Configuration *config() const { return log_parts_.config.get(); }
+  std::shared_ptr<const Configuration> config() const {
+    return log_parts_.config;
+  }
 
   std::optional<const LogSource *> log_source() const { return log_source_; }
 
+  std::chrono::nanoseconds max_out_of_order_duration() const {
+    return log_parts_.max_out_of_order_duration;
+  }
+
   std::string GetPartAt(size_t index) const {
     CHECK_LT(index, log_parts_.parts.size());
     return log_parts_.parts[index];
@@ -192,12 +208,6 @@
   LogParts log_parts_;
 };
 
-// Provides unified access to config field stored in LogPartsAccess. It is used
-// in CheckMatchingConfigs.
-inline const Configuration *GetConfig(const LogPartsAccess &log_parts_access) {
-  return log_parts_access.config();
-}
-
 // Output of LogPartsAccess for debug purposes.
 std::ostream &operator<<(std::ostream &stream,
                          const LogPartsAccess &log_parts_access);
@@ -212,20 +222,22 @@
   auto begin() const { return log_parts_.begin(); }
   auto end() const { return log_parts_.end(); }
 
+  bool empty() const { return log_parts_.empty(); }
+
   // Config that shared across all log parts.
-  const Configuration *config() const { return config_; }
+  std::shared_ptr<const Configuration> config() const { return config_; }
 
   const std::string &node_name() const { return node_name_; }
 
-  // Number of boots found in the log parts.
-  size_t boot_count() const { return boot_index_; }
+  // Returns the boot index found in the log parts.
+  size_t boot_index() const { return boot_index_; }
 
  private:
   std::string node_name_;
-  size_t boot_index_;
+  size_t boot_index_ = 0u;
   std::vector<LogPartsAccess> log_parts_;
 
-  const Configuration *config_;
+  std::shared_ptr<const Configuration> config_ = nullptr;
 };
 
 // Container that keeps a sorted list of log files and provides functions that
@@ -241,7 +253,7 @@
   explicit LogFilesContainer(const LogSource *log_source)
       : LogFilesContainer(log_source, SortParts(*log_source)) {}
 
-  // Returns true when at least on of the log files associated with node.
+  // Returns true when at least one of the log files associated with node.
   bool ContainsPartsForNode(std::string_view node_name) const {
     // TODO (Alexei): Implement
     // https://en.cppreference.com/w/cpp/container/unordered_map/find with C++20
@@ -251,15 +263,16 @@
   // Returns numbers of reboots found in log files associated with the node.
   size_t BootsForNode(std::string_view node_name) const;
 
-  // Get only log parts that associated with node and boot number.
-  SelectedLogParts SelectParts(std::string_view node_name,
-                               size_t boot_index) const;
+  // Get only log parts that associated with node, boot number, and with data of
+  // any of the types provided.
+  SelectedLogParts SelectParts(std::string_view node_name, size_t boot_index,
+                               const std::vector<StoredDataType> &types) const;
 
   // It provides access to boots logged by all log files in the container.
   const std::shared_ptr<const Boots> &boots() const { return boots_; }
 
   // Access the configuration shared with all log files in the container.
-  const Configuration *config() const { return config_; }
+  std::shared_ptr<const Configuration> config() const { return config_; }
 
   // List of logger nodes for given set of log files.
   const auto &logger_nodes() const { return logger_nodes_; }
@@ -268,6 +281,15 @@
   // Review its usage.
   std::string_view name() const { return log_files_[0].name; }
 
+  // Returns true if the timestamps (remote and local) are stored only in files
+  // distinct from the data.
+  bool TimestampsStoredSeparately() const;
+
+  // Returns true if we have timestamps in any of the files.
+  bool HasTimestamps(std::string_view node_name) const;
+
+  const std::vector<LogFile> &log_files() const { return log_files_; }
+
  private:
   LogFilesContainer(std::optional<const LogSource *> log_source,
                     std::vector<LogFile> log_files);
@@ -275,7 +297,7 @@
   std::optional<const LogSource *> log_source_;
   std::vector<LogFile> log_files_;
 
-  const Configuration *config_;
+  std::shared_ptr<const Configuration> config_;
   std::shared_ptr<const Boots> boots_;
 
   // Keeps information about nodes and number of reboots per node.

diff --git a/aos/events/logging/logfile_utils.cc b/aos/events/logging/logfile_utils.cc
index ec6bff3..02cff6d 100644
--- a/aos/events/logging/logfile_utils.cc
+++ b/aos/events/logging/logfile_utils.cc

@@ -1598,7 +1598,9 @@
 
 PartsMessageReader::PartsMessageReader(LogPartsAccess log_parts_access)
     : log_parts_access_(std::move(log_parts_access)),
-      message_reader_(MakeSpanReader(log_parts_access_, 0)) {
+      message_reader_(MakeSpanReader(log_parts_access_, 0)),
+      max_out_of_order_duration_(
+          log_parts_access_.max_out_of_order_duration()) {
   if (log_parts_access_.size() >= 2) {
     next_message_reader_.emplace(MakeSpanReader(log_parts_access_, 1));
   }
@@ -1606,8 +1608,9 @@
 }
 
 void PartsMessageReader::ComputeBootCounts() {
-  boot_counts_.assign(configuration::NodesCount(log_parts_access_.config()),
-                      std::nullopt);
+  boot_counts_.assign(
+      configuration::NodesCount(log_parts_access_.config().get()),
+      std::nullopt);
 
   const auto boots = log_parts_access_.parts().boots;
 
@@ -1634,7 +1637,7 @@
     // Older multi-node logs which are guarenteed to have UUIDs logged, or
     // single node log files with boot UUIDs in the header.  We only know how to
     // order certain boots in certain circumstances.
-    if (configuration::MultiNode(log_parts_access_.config()) || boots) {
+    if (configuration::MultiNode(log_parts_access_.config().get()) || boots) {
       for (size_t node_index = 0; node_index < boot_counts_.size();
            ++node_index) {
         if (boots->boots[node_index].size() == 1u) {
@@ -1702,11 +1705,9 @@
 }
 
 bool Message::operator<(const Message &m2) const {
-  CHECK_EQ(this->timestamp.boot, m2.timestamp.boot);
-
-  if (this->timestamp.time < m2.timestamp.time) {
+  if (this->timestamp < m2.timestamp) {
     return true;
-  } else if (this->timestamp.time > m2.timestamp.time) {
+  } else if (this->timestamp > m2.timestamp) {
     return false;
   }
 
@@ -1721,66 +1722,69 @@
 
 bool Message::operator>=(const Message &m2) const { return !(*this < m2); }
 bool Message::operator==(const Message &m2) const {
-  CHECK_EQ(this->timestamp.boot, m2.timestamp.boot);
-
-  return timestamp.time == m2.timestamp.time &&
-         channel_index == m2.channel_index && queue_index == m2.queue_index;
+  return timestamp == m2.timestamp && channel_index == m2.channel_index &&
+         queue_index == m2.queue_index;
 }
 
-std::ostream &operator<<(std::ostream &os, const UnpackedMessageHeader &m) {
-  os << "{.channel_index=" << m.channel_index
-     << ", .monotonic_sent_time=" << m.monotonic_sent_time
-     << ", .realtime_sent_time=" << m.realtime_sent_time
-     << ", .queue_index=" << m.queue_index;
-  if (m.monotonic_remote_time) {
-    os << ", .monotonic_remote_time=" << *m.monotonic_remote_time;
+bool Message::operator<=(const Message &m2) const {
+  return *this == m2 || *this < m2;
+}
+
+std::ostream &operator<<(std::ostream &os, const UnpackedMessageHeader &msg) {
+  os << "{.channel_index=" << msg.channel_index
+     << ", .monotonic_sent_time=" << msg.monotonic_sent_time
+     << ", .realtime_sent_time=" << msg.realtime_sent_time
+     << ", .queue_index=" << msg.queue_index;
+  if (msg.monotonic_remote_time) {
+    os << ", .monotonic_remote_time=" << *msg.monotonic_remote_time;
   }
   os << ", .realtime_remote_time=";
-  PrintOptionalOrNull(&os, m.realtime_remote_time);
+  PrintOptionalOrNull(&os, msg.realtime_remote_time);
   os << ", .remote_queue_index=";
-  PrintOptionalOrNull(&os, m.remote_queue_index);
-  if (m.has_monotonic_timestamp_time) {
-    os << ", .monotonic_timestamp_time=" << m.monotonic_timestamp_time;
+  PrintOptionalOrNull(&os, msg.remote_queue_index);
+  if (msg.has_monotonic_timestamp_time) {
+    os << ", .monotonic_timestamp_time=" << msg.monotonic_timestamp_time;
   }
   os << "}";
   return os;
 }
 
-std::ostream &operator<<(std::ostream &os, const Message &m) {
-  os << "{.channel_index=" << m.channel_index
-     << ", .queue_index=" << m.queue_index << ", .timestamp=" << m.timestamp;
-  if (m.data != nullptr) {
-    if (m.data->remote_queue_index.has_value()) {
-      os << ", .remote_queue_index=" << *m.data->remote_queue_index;
+std::ostream &operator<<(std::ostream &os, const Message &msg) {
+  os << "{.channel_index=" << msg.channel_index
+     << ", .queue_index=" << msg.queue_index
+     << ", .timestamp=" << msg.timestamp;
+  if (msg.data != nullptr) {
+    if (msg.data->remote_queue_index.has_value()) {
+      os << ", .remote_queue_index=" << *msg.data->remote_queue_index;
     }
-    if (m.data->monotonic_remote_time.has_value()) {
-      os << ", .monotonic_remote_time=" << *m.data->monotonic_remote_time;
+    if (msg.data->monotonic_remote_time.has_value()) {
+      os << ", .monotonic_remote_time=" << *msg.data->monotonic_remote_time;
     }
-    os << ", .data=" << m.data;
+    os << ", .data=" << msg.data;
   }
   os << "}";
   return os;
 }
 
-std::ostream &operator<<(std::ostream &os, const TimestampedMessage &m) {
-  os << "{.channel_index=" << m.channel_index
-     << ", .queue_index=" << m.queue_index
-     << ", .monotonic_event_time=" << m.monotonic_event_time
-     << ", .realtime_event_time=" << m.realtime_event_time;
-  if (m.remote_queue_index != BootQueueIndex::Invalid()) {
-    os << ", .remote_queue_index=" << m.remote_queue_index;
+std::ostream &operator<<(std::ostream &os, const TimestampedMessage &msg) {
+  os << "{.channel_index=" << msg.channel_index
+     << ", .queue_index=" << msg.queue_index
+     << ", .monotonic_event_time=" << msg.monotonic_event_time
+     << ", .realtime_event_time=" << msg.realtime_event_time;
+  if (msg.remote_queue_index != BootQueueIndex::Invalid()) {
+    os << ", .remote_queue_index=" << msg.remote_queue_index;
   }
-  if (m.monotonic_remote_time != BootTimestamp::min_time()) {
-    os << ", .monotonic_remote_time=" << m.monotonic_remote_time;
+  if (msg.monotonic_remote_time != BootTimestamp::min_time()) {
+    os << ", .monotonic_remote_time=" << msg.monotonic_remote_time;
   }
-  if (m.realtime_remote_time != realtime_clock::min_time) {
-    os << ", .realtime_remote_time=" << m.realtime_remote_time;
+  if (msg.realtime_remote_time != realtime_clock::min_time) {
+    os << ", .realtime_remote_time=" << msg.realtime_remote_time;
   }
-  if (m.monotonic_timestamp_time != BootTimestamp::min_time()) {
-    os << ", .monotonic_timestamp_time=" << m.monotonic_timestamp_time;
+  if (msg.monotonic_timestamp_time != BootTimestamp::min_time()) {
+    os << ", .monotonic_timestamp_time=" << msg.monotonic_timestamp_time;
   }
-  if (m.data != nullptr) {
-    os << ", .data=" << *m.data;
+  if (msg.data != nullptr) {
+    os << ", .data=" << *msg.data;
   } else {
     os << ", .data=nullptr";
   }
@@ -1805,40 +1809,41 @@
         break;
       }
 
-      std::shared_ptr<UnpackedMessageHeader> m =
+      std::shared_ptr<UnpackedMessageHeader> msg =
           parts_message_reader_.ReadMessage();
       // No data left, sorted forever, work through what is left.
-      if (!m) {
+      if (!msg) {
         sorted_until_ = monotonic_clock::max_time;
         break;
       }
 
       size_t monotonic_timestamp_boot = 0;
-      if (m->has_monotonic_timestamp_time) {
+      if (msg->has_monotonic_timestamp_time) {
         monotonic_timestamp_boot = parts().logger_boot_count;
       }
       size_t monotonic_remote_boot = 0xffffff;
 
-      if (m->monotonic_remote_time.has_value()) {
-        const Node *node =
-            parts().config->nodes()->Get(source_node_index_[m->channel_index]);
+      if (msg->monotonic_remote_time.has_value()) {
+        const Node *node = parts().config->nodes()->Get(
+            source_node_index_[msg->channel_index]);
 
         std::optional<size_t> boot = parts_message_reader_.boot_count(
-            source_node_index_[m->channel_index]);
+            source_node_index_[msg->channel_index]);
         CHECK(boot) << ": Failed to find boot for node '" << MaybeNodeName(node)
-                    << "', with index " << source_node_index_[m->channel_index];
+                    << "', with index "
+                    << source_node_index_[msg->channel_index];
         monotonic_remote_boot = *boot;
       }
 
       messages_.insert(
-          Message{.channel_index = m->channel_index,
+          Message{.channel_index = msg->channel_index,
                   .queue_index = BootQueueIndex{.boot = parts().boot_count,
-                                                .index = m->queue_index},
+                                                .index = msg->queue_index},
                   .timestamp = BootTimestamp{.boot = parts().boot_count,
-                                             .time = m->monotonic_sent_time},
+                                             .time = msg->monotonic_sent_time},
                   .monotonic_remote_boot = monotonic_remote_boot,
                   .monotonic_timestamp_boot = monotonic_timestamp_boot,
-                  .data = std::move(m)});
+                  .data = std::move(msg)});
 
       // Now, update sorted_until_ to match the new message.
       if (parts_message_reader_.newest_timestamp() >
@@ -1862,6 +1867,8 @@
   CHECK_GE(messages_.begin()->timestamp.time, last_message_time_)
       << DebugString() << " reading " << parts_message_reader_.filename();
   last_message_time_ = messages_.begin()->timestamp.time;
+  VLOG(1) << this << " Front, sorted until " << sorted_until_ << " for "
+          << (*messages_.begin()) << " on " << parts_message_reader_.filename();
   return &(*messages_.begin());
 }
 
@@ -1872,9 +1879,9 @@
   ss << "messages: [\n";
   int count = 0;
   bool no_dots = true;
-  for (const Message &m : messages_) {
+  for (const Message &msg : messages_) {
     if (count < 15 || count > static_cast<int>(messages_.size()) - 15) {
-      ss << m << "\n";
+      ss << msg << "\n";
     } else if (no_dots) {
       ss << "...\n";
       no_dots = false;
@@ -1885,31 +1892,25 @@
   return ss.str();
 }
 
-PartsMerger::PartsMerger(std::string_view node_name, size_t boot_count,
-                         const LogFilesContainer &log_files) {
-  const auto parts = log_files.SelectParts(node_name, boot_count);
-  node_ = configuration::GetNodeIndex(parts.config(), node_name);
-
-  for (LogPartsAccess part : parts) {
-    message_sorters_.emplace_back(std::move(part));
-  }
-
-  monotonic_start_time_ = monotonic_clock::max_time;
-  realtime_start_time_ = realtime_clock::min_time;
-  for (const MessageSorter &message_sorter : message_sorters_) {
+// Class to merge start times cleanly, reusably, and incrementally.
+class StartTimes {
+ public:
+  void Update(monotonic_clock::time_point new_monotonic_start_time,
+              realtime_clock::time_point new_realtime_start_time) {
     // We want to capture the earliest meaningful start time here. The start
     // time defaults to min_time when there's no meaningful value to report, so
     // let's ignore those.
-    if (message_sorter.monotonic_start_time() != monotonic_clock::min_time) {
+    if (new_monotonic_start_time != monotonic_clock::min_time) {
       bool accept = false;
       // We want to prioritize start times from the logger node.  Really, we
       // want to prioritize start times with a valid realtime_clock time.  So,
       // if we have a start time without a RT clock, prefer a start time with a
       // RT clock, even it if is later.
-      if (message_sorter.realtime_start_time() != realtime_clock::min_time) {
+      if (new_realtime_start_time != realtime_clock::min_time) {
         // We've got a good one.  See if the current start time has a good RT
         // clock, or if we should use this one instead.
-        if (message_sorter.monotonic_start_time() < monotonic_start_time_) {
+        if (new_monotonic_start_time < monotonic_start_time_ ||
+            monotonic_start_time_ == monotonic_clock::min_time) {
           accept = true;
         } else if (realtime_start_time_ == realtime_clock::min_time) {
           // The previous start time doesn't have a good RT time, so it is very
@@ -1919,23 +1920,45 @@
         }
       } else if (realtime_start_time_ == realtime_clock::min_time) {
         // We don't have a RT time, so take the oldest.
-        if (message_sorter.monotonic_start_time() < monotonic_start_time_) {
+        if (new_monotonic_start_time < monotonic_start_time_ ||
+            monotonic_start_time_ == monotonic_clock::min_time) {
           accept = true;
         }
       }
 
       if (accept) {
-        monotonic_start_time_ = message_sorter.monotonic_start_time();
-        realtime_start_time_ = message_sorter.realtime_start_time();
+        monotonic_start_time_ = new_monotonic_start_time;
+        realtime_start_time_ = new_realtime_start_time;
       }
     }
   }
 
-  // If there was no meaningful start time reported, just use min_time.
-  if (monotonic_start_time_ == monotonic_clock::max_time) {
-    monotonic_start_time_ = monotonic_clock::min_time;
-    realtime_start_time_ = realtime_clock::min_time;
+  monotonic_clock::time_point monotonic_start_time() const {
+    return monotonic_start_time_;
   }
+  realtime_clock::time_point realtime_start_time() const {
+    return realtime_start_time_;
+  }
+
+ private:
+  monotonic_clock::time_point monotonic_start_time_ = monotonic_clock::min_time;
+  realtime_clock::time_point realtime_start_time_ = realtime_clock::min_time;
+};
+
+PartsMerger::PartsMerger(SelectedLogParts &&parts) {
+  node_ = configuration::GetNodeIndex(parts.config().get(), parts.node_name());
+
+  for (LogPartsAccess part : parts) {
+    message_sorters_.emplace_back(std::move(part));
+  }
+
+  StartTimes start_times;
+  for (const MessageSorter &message_sorter : message_sorters_) {
+    start_times.Update(message_sorter.monotonic_start_time(),
+                       message_sorter.realtime_start_time());
+  }
+  monotonic_start_time_ = start_times.monotonic_start_time();
+  realtime_start_time_ = start_times.realtime_start_time();
 }
 
 std::vector<const LogParts *> PartsMerger::Parts() const {
@@ -1952,6 +1975,8 @@
   if (current_ != nullptr) {
     Message *result = current_->Front();
     CHECK_GE(result->timestamp.time, last_message_time_);
+    VLOG(1) << this << " PartsMerger::Front for node " << node_name() << " "
+            << *result;
     return result;
   }
 
@@ -1960,25 +1985,25 @@
   Message *oldest = nullptr;
   sorted_until_ = monotonic_clock::max_time;
   for (MessageSorter &message_sorter : message_sorters_) {
-    Message *m = message_sorter.Front();
-    if (!m) {
+    Message *msg = message_sorter.Front();
+    if (!msg) {
       sorted_until_ = std::min(sorted_until_, message_sorter.sorted_until());
       continue;
     }
-    if (oldest == nullptr || *m < *oldest) {
-      oldest = m;
+    if (oldest == nullptr || *msg < *oldest) {
+      oldest = msg;
       current_ = &message_sorter;
-    } else if (*m == *oldest) {
+    } else if (*msg == *oldest) {
       // Found a duplicate.  If there is a choice, we want the one which has
       // the timestamp time.
-      if (!m->data->has_monotonic_timestamp_time) {
+      if (!msg->data->has_monotonic_timestamp_time) {
         message_sorter.PopFront();
       } else if (!oldest->data->has_monotonic_timestamp_time) {
         current_->PopFront();
         current_ = &message_sorter;
-        oldest = m;
+        oldest = msg;
       } else {
-        CHECK_EQ(m->data->monotonic_timestamp_time,
+        CHECK_EQ(msg->data->monotonic_timestamp_time,
                  oldest->data->monotonic_timestamp_time);
         message_sorter.PopFront();
       }
@@ -1991,6 +2016,11 @@
   if (oldest) {
     CHECK_GE(oldest->timestamp.time, last_message_time_);
     last_message_time_ = oldest->timestamp.time;
+    if (monotonic_oldest_time_ > oldest->timestamp.time) {
+      VLOG(1) << this << " Updating oldest to " << oldest->timestamp.time
+              << " for node " << node_name() << " with a start time of "
+              << monotonic_start_time_ << " " << *oldest;
+    }
     monotonic_oldest_time_ =
         std::min(monotonic_oldest_time_, oldest->timestamp.time);
   } else {
@@ -1999,6 +2029,12 @@
 
   // Return the oldest message found.  This will be nullptr if nothing was
   // found, indicating there is nothing left.
+  if (oldest) {
+    VLOG(1) << this << " PartsMerger::Front for node " << node_name() << " "
+            << *oldest;
+  } else {
+    VLOG(1) << this << " PartsMerger::Front for node " << node_name();
+  }
   return oldest;
 }
 
@@ -2009,29 +2045,52 @@
 }
 
 BootMerger::BootMerger(std::string_view node_name,
-                       const LogFilesContainer &log_files) {
+                       const LogFilesContainer &log_files,
+                       const std::vector<StoredDataType> &types)
+    : configuration_(log_files.config()),
+      node_(configuration::GetNodeIndex(configuration_.get(), node_name)) {
   size_t number_of_boots = log_files.BootsForNode(node_name);
   parts_mergers_.reserve(number_of_boots);
   for (size_t i = 0; i < number_of_boots; ++i) {
     VLOG(2) << "Boot " << i;
-    parts_mergers_.emplace_back(
-        std::make_unique<PartsMerger>(node_name, i, log_files));
+    SelectedLogParts selected_parts =
+        log_files.SelectParts(node_name, i, types);
+    // We are guarenteed to have something each boot, but not guarenteed to have
+    // both timestamps and data for each boot.  If we don't have anything, don't
+    // create a parts merger.  The rest of this class will detect that and
+    // ignore it as required.
+    if (selected_parts.empty()) {
+      parts_mergers_.emplace_back(nullptr);
+    } else {
+      parts_mergers_.emplace_back(
+          std::make_unique<PartsMerger>(std::move(selected_parts)));
+    }
   }
 }
 
-Message *BootMerger::Front() {
-  Message *result = parts_mergers_[index_]->Front();
+std::string_view BootMerger::node_name() const {
+  return configuration::NodeName(configuration().get(), node());
+}
 
-  if (result != nullptr) {
-    return result;
+Message *BootMerger::Front() {
+  if (parts_mergers_[index_].get() != nullptr) {
+    Message *result = parts_mergers_[index_]->Front();
+
+    if (result != nullptr) {
+      VLOG(1) << this << " BootMerger::Front " << node_name() << " " << *result;
+      return result;
+    }
   }
 
   if (index_ + 1u == parts_mergers_.size()) {
     // At the end of the last node merger, just return.
+    VLOG(1) << this << " BootMerger::Front " << node_name() << " nullptr";
     return nullptr;
   } else {
     ++index_;
-    return Front();
+    Message *result = Front();
+    VLOG(1) << this << " BootMerger::Front " << node_name() << " " << *result;
+    return result;
   }
 }
 
@@ -2040,6 +2099,8 @@
 std::vector<const LogParts *> BootMerger::Parts() const {
   std::vector<const LogParts *> results;
   for (const std::unique_ptr<PartsMerger> &parts_merger : parts_mergers_) {
+    if (!parts_merger) continue;
+
     std::vector<const LogParts *> node_parts = parts_merger->Parts();
 
     results.insert(results.end(), std::make_move_iterator(node_parts.begin()),
@@ -2049,17 +2110,241 @@
   return results;
 }
 
-TimestampMapper::TimestampMapper(std::string_view node_name,
-                                 const LogFilesContainer &log_files)
-    : boot_merger_(node_name, log_files),
-      timestamp_callback_([](TimestampedMessage *) {}) {
-  for (const LogParts *part : boot_merger_.Parts()) {
-    if (!configuration_) {
-      configuration_ = part->config;
-    } else {
-      CHECK_EQ(configuration_.get(), part->config.get());
-    }
+monotonic_clock::time_point BootMerger::monotonic_start_time(
+    size_t boot) const {
+  CHECK_LT(boot, parts_mergers_.size());
+  if (parts_mergers_[boot]) {
+    return parts_mergers_[boot]->monotonic_start_time();
   }
+  return monotonic_clock::min_time;
+}
+
+realtime_clock::time_point BootMerger::realtime_start_time(size_t boot) const {
+  CHECK_LT(boot, parts_mergers_.size());
+  if (parts_mergers_[boot]) {
+    return parts_mergers_[boot]->realtime_start_time();
+  }
+  return realtime_clock::min_time;
+}
+
+monotonic_clock::time_point BootMerger::monotonic_oldest_time(
+    size_t boot) const {
+  CHECK_LT(boot, parts_mergers_.size());
+  if (parts_mergers_[boot]) {
+    return parts_mergers_[boot]->monotonic_oldest_time();
+  }
+  return monotonic_clock::max_time;
+}
+
+bool BootMerger::started() const {
+  if (index_ == 0) {
+    if (!parts_mergers_[0]) {
+      return false;
+    }
+    return parts_mergers_[index_]->sorted_until() != monotonic_clock::min_time;
+  }
+  return true;
+}
+
+SplitTimestampBootMerger::SplitTimestampBootMerger(
+    std::string_view node_name, const LogFilesContainer &log_files,
+    TimestampQueueStrategy timestamp_queue_strategy)
+    : boot_merger_(node_name, log_files,
+                   (timestamp_queue_strategy ==
+                    TimestampQueueStrategy::kQueueTimestampsAtStartup)
+                       ? std::vector<StoredDataType>{StoredDataType::DATA}
+                       : std::vector<StoredDataType>{
+                             StoredDataType::DATA, StoredDataType::TIMESTAMPS,
+                             StoredDataType::REMOTE_TIMESTAMPS}) {
+  // Make the timestamp_boot_merger_ only if we are asked to, and if there are
+  // files to put in it.  We don't need it for a data only log.
+  if (timestamp_queue_strategy ==
+          TimestampQueueStrategy::kQueueTimestampsAtStartup &&
+      log_files.HasTimestamps(node_name)) {
+    timestamp_boot_merger_ = std::make_unique<BootMerger>(
+        node_name, log_files,
+        std::vector<StoredDataType>{StoredDataType::TIMESTAMPS,
+                                    StoredDataType::REMOTE_TIMESTAMPS});
+  }
+
+  size_t number_of_boots = log_files.BootsForNode(node_name);
+  monotonic_start_time_.reserve(number_of_boots);
+  realtime_start_time_.reserve(number_of_boots);
+
+  // Start times are split across the timestamp boot merger, and data boot
+  // merger.  Pull from both and combine them to get the same answer as before.
+  for (size_t i = 0u; i < number_of_boots; ++i) {
+    StartTimes start_times;
+
+    if (timestamp_boot_merger_) {
+      start_times.Update(timestamp_boot_merger_->monotonic_start_time(i),
+                         timestamp_boot_merger_->realtime_start_time(i));
+    }
+
+    start_times.Update(boot_merger_.monotonic_start_time(i),
+                       boot_merger_.realtime_start_time(i));
+
+    monotonic_start_time_.push_back(start_times.monotonic_start_time());
+    realtime_start_time_.push_back(start_times.realtime_start_time());
+  }
+}
+
+void SplitTimestampBootMerger::QueueTimestamps(
+    std::function<void(TimestampedMessage *)> fn,
+    const std::vector<size_t> &source_node) {
+  if (!timestamp_boot_merger_) {
+    return;
+  }
+
+  while (true) {
+    // Load all the timestamps.  If we find data, ignore it and drop it on the
+    // floor.  It will be read when boot_merger_ is used.
+    Message *msg = timestamp_boot_merger_->Front();
+    if (!msg) {
+      queue_timestamps_ran_ = true;
+      return;
+    }
+    if (source_node[msg->channel_index] != static_cast<size_t>(node())) {
+      timestamp_messages_.emplace_back(TimestampedMessage{
+          .channel_index = msg->channel_index,
+          .queue_index = msg->queue_index,
+          .monotonic_event_time = msg->timestamp,
+          .realtime_event_time = msg->data->realtime_sent_time,
+          .remote_queue_index =
+              BootQueueIndex{.boot = msg->monotonic_remote_boot,
+                             .index = msg->data->remote_queue_index.value()},
+          .monotonic_remote_time = {msg->monotonic_remote_boot,
+                                    msg->data->monotonic_remote_time.value()},
+          .realtime_remote_time = msg->data->realtime_remote_time.value(),
+          .monotonic_timestamp_time = {msg->monotonic_timestamp_boot,
+                                       msg->data->monotonic_timestamp_time},
+          .data = std::move(msg->data)});
+
+      VLOG(2) << this << " Queued timestamp of " << timestamp_messages_.back();
+      fn(&timestamp_messages_.back());
+    } else {
+      VLOG(2) << this << " Dropped data";
+    }
+    timestamp_boot_merger_->PopFront();
+  }
+
+  // TODO(austin): Push the queue into TimestampMapper instead.  Have it pull
+  // all the timestamps.  That will also make it so we don't have to clear the
+  // function.
+}
+
+std::string_view SplitTimestampBootMerger::node_name() const {
+  return configuration::NodeName(configuration().get(), node());
+}
+
+monotonic_clock::time_point SplitTimestampBootMerger::monotonic_start_time(
+    size_t boot) const {
+  CHECK_LT(boot, monotonic_start_time_.size());
+  return monotonic_start_time_[boot];
+}
+
+realtime_clock::time_point SplitTimestampBootMerger::realtime_start_time(
+    size_t boot) const {
+  CHECK_LT(boot, realtime_start_time_.size());
+  return realtime_start_time_[boot];
+}
+
+monotonic_clock::time_point SplitTimestampBootMerger::monotonic_oldest_time(
+    size_t boot) const {
+  if (!timestamp_boot_merger_) {
+    return boot_merger_.monotonic_oldest_time(boot);
+  }
+  return std::min(boot_merger_.monotonic_oldest_time(boot),
+                  timestamp_boot_merger_->monotonic_oldest_time(boot));
+}
+
+Message *SplitTimestampBootMerger::Front() {
+  Message *boot_merger_front = boot_merger_.Front();
+
+  if (timestamp_boot_merger_) {
+    CHECK(queue_timestamps_ran_);
+  }
+
+  // timestamp_messages_ is a queue of TimestampedMessage, but we are supposed
+  // to return a Message.  We need to convert the first message in the list
+  // before returning it (and comparing, honestly).  Fill next_timestamp_ in if
+  // it is empty so the rest of the logic here can just look at next_timestamp_
+  // and use that instead.
+  if (!next_timestamp_ && !timestamp_messages_.empty()) {
+    auto &front = timestamp_messages_.front();
+    next_timestamp_ = Message{
+        .channel_index = front.channel_index,
+        .queue_index = front.queue_index,
+        .timestamp = front.monotonic_event_time,
+        .monotonic_remote_boot = front.remote_queue_index.boot,
+        .monotonic_timestamp_boot = front.monotonic_timestamp_time.boot,
+        .data = std::move(front.data),
+    };
+    timestamp_messages_.pop_front();
+  }
+
+  if (!next_timestamp_) {
+    message_source_ = MessageSource::kBootMerger;
+    if (boot_merger_front != nullptr) {
+      VLOG(1) << this << " SplitTimestampBootMerger::Front " << node_name()
+              << " " << *boot_merger_front;
+    } else {
+      VLOG(1) << this << " SplitTimestampBootMerger::Front " << node_name()
+              << " nullptr";
+    }
+    return boot_merger_front;
+  }
+
+  if (boot_merger_front == nullptr) {
+    message_source_ = MessageSource::kTimestampMessage;
+
+    VLOG(1) << this << " SplitTimestampBootMerger::Front " << node_name() << " "
+            << next_timestamp_.value();
+    return &next_timestamp_.value();
+  }
+
+  if (*boot_merger_front <= next_timestamp_.value()) {
+    if (*boot_merger_front == next_timestamp_.value()) {
+      VLOG(1) << this << " SplitTimestampBootMerger::Front " << node_name()
+              << " Dropping duplicate timestamp.";
+      next_timestamp_.reset();
+    }
+    message_source_ = MessageSource::kBootMerger;
+    if (boot_merger_front != nullptr) {
+      VLOG(1) << this << " SplitTimestampBootMerger::Front " << node_name()
+              << " " << *boot_merger_front;
+    } else {
+      VLOG(1) << this << " SplitTimestampBootMerger::Front " << node_name()
+              << " nullptr";
+    }
+    return boot_merger_front;
+  } else {
+    message_source_ = MessageSource::kTimestampMessage;
+    VLOG(1) << this << " SplitTimestampBootMerger::Front " << node_name() << " "
+            << next_timestamp_.value();
+    return &next_timestamp_.value();
+  }
+}
+
+void SplitTimestampBootMerger::PopFront() {
+  switch (message_source_) {
+    case MessageSource::kTimestampMessage:
+      CHECK(next_timestamp_.has_value());
+      next_timestamp_.reset();
+      break;
+    case MessageSource::kBootMerger:
+      boot_merger_.PopFront();
+      break;
+  }
+}
+
+TimestampMapper::TimestampMapper(
+    std::string_view node_name, const LogFilesContainer &log_files,
+    TimestampQueueStrategy timestamp_queue_strategy)
+    : boot_merger_(node_name, log_files, timestamp_queue_strategy),
+      timestamp_callback_([](TimestampedMessage *) {}) {
+  configuration_ = boot_merger_.configuration();
+
   const Configuration *config = configuration_.get();
   // Only fill out nodes_data_ if there are nodes. Otherwise, everything is
   // pretty simple.
@@ -2118,17 +2403,18 @@
   }
 }
 
-void TimestampMapper::QueueMessage(Message *m) {
+void TimestampMapper::QueueMessage(Message *msg) {
   matched_messages_.emplace_back(
-      TimestampedMessage{.channel_index = m->channel_index,
-                         .queue_index = m->queue_index,
-                         .monotonic_event_time = m->timestamp,
-                         .realtime_event_time = m->data->realtime_sent_time,
+      TimestampedMessage{.channel_index = msg->channel_index,
+                         .queue_index = msg->queue_index,
+                         .monotonic_event_time = msg->timestamp,
+                         .realtime_event_time = msg->data->realtime_sent_time,
                          .remote_queue_index = BootQueueIndex::Invalid(),
                          .monotonic_remote_time = BootTimestamp::min_time(),
                          .realtime_remote_time = realtime_clock::min_time,
                          .monotonic_timestamp_time = BootTimestamp::min_time(),
-                         .data = std::move(m->data)});
+                         .data = std::move(msg->data)});
+  VLOG(1) << node_name() << " Inserted " << matched_messages_.back();
 }
 
 TimestampedMessage *TimestampMapper::Front() {
@@ -2137,18 +2423,26 @@
     case FirstMessage::kNeedsUpdate:
       break;
     case FirstMessage::kInMessage:
+      VLOG(1) << this << " TimestampMapper::Front " << node_name() << " "
+              << matched_messages_.front();
       return &matched_messages_.front();
     case FirstMessage::kNullptr:
+      VLOG(1) << this << " TimestampMapper::Front " << node_name()
+              << " nullptr";
       return nullptr;
   }
 
   if (matched_messages_.empty()) {
     if (!QueueMatched()) {
       first_message_ = FirstMessage::kNullptr;
+      VLOG(1) << this << " TimestampMapper::Front " << node_name()
+              << " nullptr";
       return nullptr;
     }
   }
   first_message_ = FirstMessage::kInMessage;
+  VLOG(1) << this << " TimestampMapper::Front " << node_name() << " "
+          << matched_messages_.front();
   return &matched_messages_.front();
 }
 
@@ -2164,6 +2458,7 @@
     const TimestampedMessage & /*message*/) {
   if (replay_channels_callback_ &&
       !replay_channels_callback_(matched_messages_.back())) {
+    VLOG(1) << node_name() << " Popped " << matched_messages_.back();
     matched_messages_.pop_back();
     return true;
   }
@@ -2174,15 +2469,16 @@
   if (nodes_data_.empty()) {
     // Simple path.  We are single node, so there are no timestamps to match!
     CHECK_EQ(messages_.size(), 0u);
-    Message *m = boot_merger_.Front();
-    if (!m) {
+    Message *msg = boot_merger_.Front();
+    if (!msg) {
       return MatchResult::kEndOfFile;
     }
     // Enqueue this message into matched_messages_ so we have a place to
     // associate remote timestamps, and return it.
-    QueueMessage(m);
+    QueueMessage(msg);
 
-    CHECK_GE(matched_messages_.back().monotonic_event_time, last_message_time_);
+    CHECK_GE(matched_messages_.back().monotonic_event_time, last_message_time_)
+        << " on " << node_name();
     last_message_time_ = matched_messages_.back().monotonic_event_time;
 
     // We are thin wrapper around parts_merger.  Call it directly.
@@ -2208,12 +2504,13 @@
     boot_merger_.PopFront();
   }
 
-  Message *m = &(messages_.front());
+  Message *msg = &(messages_.front());
 
-  if (source_node_[m->channel_index] == node()) {
+  if (source_node_[msg->channel_index] == node()) {
     // From us, just forward it on, filling the remote data in as invalid.
-    QueueMessage(m);
-    CHECK_GE(matched_messages_.back().monotonic_event_time, last_message_time_);
+    QueueMessage(msg);
+    CHECK_GE(matched_messages_.back().monotonic_event_time, last_message_time_)
+        << " on " << node_name();
     last_message_time_ = matched_messages_.back().monotonic_event_time;
     messages_.pop_front();
     timestamp_callback_(&matched_messages_.back());
@@ -2224,25 +2521,28 @@
   } else {
     // Got a timestamp, find the matching remote data, match it, and return
     // it.
-    Message data = MatchingMessageFor(*m);
+    Message data = MatchingMessageFor(*msg);
 
     // Return the data from the remote.  The local message only has timestamp
     // info which isn't relevant anymore once extracted.
     matched_messages_.emplace_back(TimestampedMessage{
-        .channel_index = m->channel_index,
-        .queue_index = m->queue_index,
-        .monotonic_event_time = m->timestamp,
-        .realtime_event_time = m->data->realtime_sent_time,
+        .channel_index = msg->channel_index,
+        .queue_index = msg->queue_index,
+        .monotonic_event_time = msg->timestamp,
+        .realtime_event_time = msg->data->realtime_sent_time,
         .remote_queue_index =
-            BootQueueIndex{.boot = m->monotonic_remote_boot,
-                           .index = m->data->remote_queue_index.value()},
-        .monotonic_remote_time = {m->monotonic_remote_boot,
-                                  m->data->monotonic_remote_time.value()},
-        .realtime_remote_time = m->data->realtime_remote_time.value(),
-        .monotonic_timestamp_time = {m->monotonic_timestamp_boot,
-                                     m->data->monotonic_timestamp_time},
+            BootQueueIndex{.boot = msg->monotonic_remote_boot,
+                           .index = msg->data->remote_queue_index.value()},
+        .monotonic_remote_time = {msg->monotonic_remote_boot,
+                                  msg->data->monotonic_remote_time.value()},
+        .realtime_remote_time = msg->data->realtime_remote_time.value(),
+        .monotonic_timestamp_time = {msg->monotonic_timestamp_boot,
+                                     msg->data->monotonic_timestamp_time},
         .data = std::move(data.data)});
-    CHECK_GE(matched_messages_.back().monotonic_event_time, last_message_time_);
+    VLOG(1) << node_name() << " Inserted timestamp "
+            << matched_messages_.back();
+    CHECK_GE(matched_messages_.back().monotonic_event_time, last_message_time_)
+        << " on " << node_name() << " " << matched_messages_.back();
     last_message_time_ = matched_messages_.back().monotonic_event_time;
     // Since messages_ holds the data, drop it.
     messages_.pop_front();
@@ -2298,6 +2598,7 @@
   last_popped_message_time_ = Front()->monotonic_event_time;
   first_message_ = FirstMessage::kNeedsUpdate;
 
+  VLOG(1) << node_name() << " Popped " << matched_messages_.back();
   matched_messages_.pop_front();
 }
 
@@ -2391,9 +2692,9 @@
     auto it = std::find_if(
         data_queue->begin(), data_queue->end(),
         [remote_queue_index,
-         remote_boot = monotonic_remote_time.boot](const Message &m) {
-          return m.queue_index == remote_queue_index &&
-                 m.timestamp.boot == remote_boot;
+         remote_boot = monotonic_remote_time.boot](const Message &msg) {
+          return msg.queue_index == remote_queue_index &&
+                 msg.timestamp.boot == remote_boot;
         });
     if (it == data_queue->end()) {
       return Message{.channel_index = message.channel_index,
@@ -2444,14 +2745,14 @@
 }
 
 bool TimestampMapper::Queue() {
-  Message *m = boot_merger_.Front();
-  if (m == nullptr) {
+  Message *msg = boot_merger_.Front();
+  if (msg == nullptr) {
     return false;
   }
   for (NodeData &node_data : nodes_data_) {
     if (!node_data.any_delivered) continue;
     if (!node_data.save_for_peer) continue;
-    if (node_data.channels[m->channel_index].delivered) {
+    if (node_data.channels[msg->channel_index].delivered) {
       // If we have data but no timestamps (logs where the timestamps didn't get
       // logged are classic), we can grow this indefinitely.  We don't need to
       // keep anything that is older than the last message returned.
@@ -2459,11 +2760,11 @@
       // We have the time on the source node.
       // We care to wait until we have the time on the destination node.
       std::deque<Message> &messages =
-          node_data.channels[m->channel_index].messages;
+          node_data.channels[msg->channel_index].messages;
       // Max delay over the network is the TTL, so let's take the queue time and
       // add TTL to it.  Don't forget any messages which are reliable until
       // someone can come up with a good reason to forget those too.
-      if (node_data.channels[m->channel_index].time_to_live >
+      if (node_data.channels[msg->channel_index].time_to_live >
           chrono::nanoseconds(0)) {
         // We need to make *some* assumptions about network delay for this to
         // work.  We want to only look at the RX side.  This means we need to
@@ -2477,21 +2778,25 @@
         // messages getting sent twice.
         while (messages.size() > 1u &&
                messages.begin()->timestamp +
-                       node_data.channels[m->channel_index].time_to_live +
+                       node_data.channels[msg->channel_index].time_to_live +
                        chrono::duration_cast<chrono::nanoseconds>(
                            chrono::duration<double>(FLAGS_max_network_delay)) <
                    last_popped_message_time_) {
           messages.pop_front();
         }
       }
-      node_data.channels[m->channel_index].messages.emplace_back(*m);
+      node_data.channels[msg->channel_index].messages.emplace_back(*msg);
     }
   }
 
-  messages_.emplace_back(std::move(*m));
+  messages_.emplace_back(std::move(*msg));
   return true;
 }
 
+void TimestampMapper::QueueTimestamps() {
+  boot_merger_.QueueTimestamps(std::ref(timestamp_callback_), source_node_);
+}
+
 std::string TimestampMapper::DebugString() const {
   std::stringstream ss;
   ss << "node " << node() << " (" << node_name() << ") [\n";
@@ -2510,8 +2815,8 @@
       }
 
       ss << "  channel " << channel_index << " [\n";
-      for (const Message &m : channel_data.messages) {
-        ss << "    " << m << "\n";
+      for (const Message &msg : channel_data.messages) {
+        ss << "    " << msg << "\n";
       }
       ss << "  ]\n";
       ++channel_index;

diff --git a/aos/events/logging/logfile_utils.h b/aos/events/logging/logfile_utils.h
index 18902d3..fb3fcbf 100644
--- a/aos/events/logging/logfile_utils.h
+++ b/aos/events/logging/logfile_utils.h

@@ -18,6 +18,7 @@
 #include "absl/types/span.h"
 #include "flatbuffers/flatbuffers.h"
 
+#include "aos/configuration.h"
 #include "aos/containers/resizeable_buffer.h"
 #include "aos/events/event_loop.h"
 #include "aos/events/logging/boot_timestamp.h"
@@ -288,8 +289,8 @@
     return raw_log_file_header_;
   }
 
-  // Returns the minimum maount of data needed to queue up for sorting before
-  // ware guarenteed to not see data out of order.
+  // Returns the minimum amount of data needed to queue up for sorting before
+  // we're guarenteed to not see data out of order.
   std::chrono::nanoseconds max_out_of_order_duration() const {
     return max_out_of_order_duration_;
   }
@@ -376,7 +377,7 @@
   // Returns the minimum amount of data needed to queue up for sorting before
   // we are guarenteed to not see data out of order.
   std::chrono::nanoseconds max_out_of_order_duration() const {
-    return message_reader_.max_out_of_order_duration();
+    return max_out_of_order_duration_;
   }
 
   // Returns the newest timestamp read out of the log file.
@@ -432,6 +433,8 @@
 
   // Per node boot counts.
   std::vector<std::optional<size_t>> boot_counts_;
+
+  const std::chrono::nanoseconds max_out_of_order_duration_;
 };
 
 // Stores MessageHeader as a flat header and inline, aligned block of data.
@@ -526,6 +529,7 @@
   std::shared_ptr<UnpackedMessageHeader> data;
 
   bool operator<(const Message &m2) const;
+  bool operator<=(const Message &m2) const;
   bool operator>=(const Message &m2) const;
   bool operator==(const Message &m2) const;
 };
@@ -608,8 +612,7 @@
 // boot.
 class PartsMerger {
  public:
-  PartsMerger(std::string_view node_name, size_t boot_count,
-              const LogFilesContainer &log_files);
+  PartsMerger(SelectedLogParts &&selected_parts);
 
   // Copying and moving will mess up the internal raw pointers.  Just don't do
   // it.
@@ -621,11 +624,15 @@
   // Node index in the configuration of this node.
   int node() const { return node_; }
 
+  std::string_view node_name() const {
+    return configuration::NodeName(configuration().get(), node());
+  }
+
   // List of parts being sorted together.
   std::vector<const LogParts *> Parts() const;
 
-  const Configuration *configuration() const {
-    return message_sorters_[0].parts().config.get();
+  const std::shared_ptr<const Configuration> configuration() const {
+    return message_sorters_[0].parts().config;
   }
 
   monotonic_clock::time_point monotonic_start_time() const {
@@ -634,7 +641,15 @@
   realtime_clock::time_point realtime_start_time() const {
     return realtime_start_time_;
   }
-  monotonic_clock::time_point monotonic_oldest_time() const {
+
+  // Returns the oldest message observed in this set of parts.  This could be
+  // before the start time if we fetched it at the start of logging from long
+  // ago.
+  monotonic_clock::time_point monotonic_oldest_time() {
+    if (monotonic_oldest_time_ == monotonic_clock::max_time) {
+      VLOG(1) << "No oldest message time, fetching " << node_name();
+      (void)Front();
+    }
     return monotonic_oldest_time_;
   }
 
@@ -675,7 +690,8 @@
 // stream.
 class BootMerger {
  public:
-  BootMerger(std::string_view node_name, const LogFilesContainer &log_files);
+  BootMerger(std::string_view node_name, const LogFilesContainer &log_files,
+             const std::vector<StoredDataType> &types);
 
   // Copying and moving will mess up the internal raw pointers.  Just don't do
   // it.
@@ -685,33 +701,21 @@
   void operator=(BootMerger &&) = delete;
 
   // Node index in the configuration of this node.
-  int node() const { return parts_mergers_[0]->node(); }
+  int node() const { return node_; }
+  std::string_view node_name() const;
 
   // List of parts being sorted together.
   std::vector<const LogParts *> Parts() const;
 
-  const Configuration *configuration() const {
-    return parts_mergers_[0]->configuration();
+  const std::shared_ptr<const Configuration> configuration() const {
+    return configuration_;
   }
 
-  monotonic_clock::time_point monotonic_start_time(size_t boot) const {
-    CHECK_LT(boot, parts_mergers_.size());
-    return parts_mergers_[boot]->monotonic_start_time();
-  }
-  realtime_clock::time_point realtime_start_time(size_t boot) const {
-    CHECK_LT(boot, parts_mergers_.size());
-    return parts_mergers_[boot]->realtime_start_time();
-  }
-  monotonic_clock::time_point monotonic_oldest_time(size_t boot) const {
-    CHECK_LT(boot, parts_mergers_.size());
-    return parts_mergers_[boot]->monotonic_oldest_time();
-  }
+  monotonic_clock::time_point monotonic_start_time(size_t boot) const;
+  realtime_clock::time_point realtime_start_time(size_t boot) const;
+  monotonic_clock::time_point monotonic_oldest_time(size_t boot) const;
 
-  bool started() const {
-    return parts_mergers_[index_]->sorted_until() !=
-               monotonic_clock::min_time ||
-           index_ != 0;
-  }
+  bool started() const;
 
   // Returns the next sorted message from the set of log files.  It is safe to
   // call std::move() on the result to move the data flatbuffer from it.
@@ -725,7 +729,101 @@
 
   // TODO(austin): Sanjay points out this is pretty inefficient.  Don't keep so
   // many things open.
+  // A list of all the parts mergers.  Only the boots with something to sort are
+  // instantiated.
   std::vector<std::unique_ptr<PartsMerger>> parts_mergers_;
+
+  std::shared_ptr<const Configuration> configuration_;
+  int node_;
+};
+
+enum class TimestampQueueStrategy {
+  // Read the timestamps at the same time as all the other data.
+  kQueueTogether,
+  // Read the timestamps first.
+  kQueueTimestampsAtStartup,
+};
+
+// Class to manage queueing up timestamps from BootMerger and notifying
+// TimestampMapper of them.
+class SplitTimestampBootMerger {
+ public:
+  SplitTimestampBootMerger(std::string_view node_name,
+                           const LogFilesContainer &log_files,
+                           TimestampQueueStrategy timestamp_queue_strategy);
+
+  // Copying and moving will mess up the internal raw pointers.  Just don't do
+  // it.
+  SplitTimestampBootMerger(SplitTimestampBootMerger const &) = delete;
+  SplitTimestampBootMerger(SplitTimestampBootMerger &&) = delete;
+  void operator=(SplitTimestampBootMerger const &) = delete;
+  void operator=(SplitTimestampBootMerger &&) = delete;
+
+  // Reads all timestamps into a member variable queue, and calls the function
+  // on each timestamp.  This only saves timestamps, which are defined as
+  // messages sent on this node, but not originally from this node.  To make
+  // that distinction, source_node is provided which has a list of which node
+  // index is the source node for each channel, where the channel index is the
+  // array index.
+  void QueueTimestamps(std::function<void(TimestampedMessage *)> fn,
+                       const std::vector<size_t> &source_node);
+
+  // Node index in the configuration of this node.
+  int node() const { return boot_merger_.node(); }
+  // Returns the name of the node this class is sorting for.
+  std::string_view node_name() const;
+
+  std::shared_ptr<const Configuration> configuration() const {
+    return boot_merger_.configuration();
+  }
+
+  monotonic_clock::time_point monotonic_start_time(size_t boot) const;
+  realtime_clock::time_point realtime_start_time(size_t boot) const;
+  monotonic_clock::time_point monotonic_oldest_time(size_t boot) const;
+
+  // Returns true if the log has been started.
+  bool started() const {
+    // Timestamps don't count, so only track boot_merger_.
+    return boot_merger_.started();
+  }
+
+  // Returns the next sorted message from the set of log files.  It is safe to
+  // call std::move() on the result to move the data flatbuffer from it.
+  Message *Front();
+
+  // Pops the front message.  This should only be called after a call to
+  // Front().
+  void PopFront();
+
+ private:
+  enum class MessageSource {
+    kTimestampMessage,
+    kBootMerger,
+  };
+
+  MessageSource message_source_ = MessageSource::kBootMerger;
+
+  // Boot merger for data and potentially timestamps.
+  BootMerger boot_merger_;
+
+  // Boot merger for just timestamps.  Any data read from here is to be ignored.
+  std::unique_ptr<BootMerger> timestamp_boot_merger_;
+
+  // The callback requires us to convert each message to a TimestampedMessage.
+  std::deque<TimestampedMessage> timestamp_messages_;
+
+  // Storage for the next timestamp message to return.  This is separate so we
+  // can convert them back to a Message.
+  //
+  // TODO(austin): It would be nice to not have to convert...
+  std::optional<Message> next_timestamp_;
+
+  // Start times for each boot.
+  std::vector<monotonic_clock::time_point> monotonic_start_time_;
+  std::vector<realtime_clock::time_point> realtime_start_time_;
+
+  // Tracks if QueueTimestamps loaded any timestamps.
+  bool queue_timestamps_ran_ = false;
 };
 
 // Class to match timestamps with the corresponding data from other nodes.
@@ -735,7 +833,8 @@
 class TimestampMapper {
  public:
   TimestampMapper(std::string_view node_name,
-                  const LogFilesContainer &log_files);
+                  const LogFilesContainer &log_files,
+                  TimestampQueueStrategy timestamp_queue_strategy);
 
   // Copying and moving will mess up the internal raw pointers.  Just don't do
   // it.
@@ -782,6 +881,11 @@
   // Returns debug information about this node.
   std::string DebugString() const;
 
+  // Queues just the timestamps so that the timestamp callback gets called.
+  // Note, the timestamp callback will get called when they get returned too, so
+  // make sure to unset it if you don't want to be called twice.
+  void QueueTimestamps();
+
   // Queues data the provided time.
   void QueueUntil(BootTimestamp queue_time);
   // Queues until we have time_estimation_buffer of data in the queue.
@@ -880,15 +984,11 @@
 
   // Returns the name of the node this class is sorting for.
   std::string_view node_name() const {
-    return configuration_->has_nodes() ? configuration_->nodes()
-                                             ->Get(boot_merger_.node())
-                                             ->name()
-                                             ->string_view()
-                                       : "(single node)";
+    return configuration::NodeName(configuration(), node());
   }
 
   // The node merger to source messages from.
-  BootMerger boot_merger_;
+  SplitTimestampBootMerger boot_merger_;
 
   std::shared_ptr<const Configuration> configuration_;
 

diff --git a/aos/events/logging/logfile_utils_test.cc b/aos/events/logging/logfile_utils_test.cc
index f4071e4..ea56d8d 100644
--- a/aos/events/logging/logfile_utils_test.cc
+++ b/aos/events/logging/logfile_utils_test.cc

@@ -216,8 +216,13 @@
     writer.QueueSpan(m2.span());
   }
 
+  // When parts are sorted, we choose the highest max out of order duration for
+  // all parts with the same part uuid.
   const std::vector<LogFile> parts = SortParts({logfile0, logfile1});
 
+  EXPECT_EQ(parts.size(), 1);
+  EXPECT_EQ(parts[0].parts.size(), 1);
+
   PartsMessageReader reader(parts[0].parts[0]);
 
   EXPECT_EQ(reader.filename(), logfile0);
@@ -225,16 +230,18 @@
   // Confirm that the timestamps track, and the filename also updates.
   // Read the first message.
   EXPECT_EQ(reader.newest_timestamp(), monotonic_clock::min_time);
+  // Since config1 has higher max out of order duration, that will be used to
+  // read partfiles with same part uuid, i.e logfile0 and logfile1.
   EXPECT_EQ(
       reader.max_out_of_order_duration(),
-      std::chrono::nanoseconds(config0.message().max_out_of_order_duration()));
+      std::chrono::nanoseconds(config1.message().max_out_of_order_duration()));
   EXPECT_TRUE(reader.ReadMessage());
   EXPECT_EQ(reader.filename(), logfile0);
   EXPECT_EQ(reader.newest_timestamp(),
             monotonic_clock::time_point(chrono::nanoseconds(1)));
   EXPECT_EQ(
       reader.max_out_of_order_duration(),
-      std::chrono::nanoseconds(config0.message().max_out_of_order_duration()));
+      std::chrono::nanoseconds(config1.message().max_out_of_order_duration()));
 
   // Read the second message.
   EXPECT_TRUE(reader.ReadMessage());
@@ -252,6 +259,11 @@
       reader.max_out_of_order_duration(),
       std::chrono::nanoseconds(config1.message().max_out_of_order_duration()));
   EXPECT_EQ(reader.newest_timestamp(), monotonic_clock::max_time);
+
+  // Verify that the parts metadata has the correct max out of order duration.
+  EXPECT_EQ(
+      parts[0].parts[0].max_out_of_order_duration,
+      std::chrono::nanoseconds(config1.message().max_out_of_order_duration()));
 }
 
 // Tests that Message's operator < works as expected.
@@ -767,7 +779,10 @@
   LogFilesContainer log_files(parts);
   ASSERT_EQ(parts.size(), 1u);
 
-  PartsMerger merger("pi1", 0, log_files);
+  PartsMerger merger(
+      log_files.SelectParts("pi1", 0,
+                            {StoredDataType::DATA, StoredDataType::TIMESTAMPS,
+                             StoredDataType::REMOTE_TIMESTAMPS}));
 
   EXPECT_EQ(merger.sorted_until(), monotonic_clock::min_time);
 
@@ -869,7 +884,10 @@
   LogFilesContainer log_files(parts);
   ASSERT_EQ(parts.size(), 1u);
 
-  PartsMerger merger("pi1", 0, log_files);
+  PartsMerger merger(
+      log_files.SelectParts("pi1", 0,
+                            {StoredDataType::DATA, StoredDataType::TIMESTAMPS,
+                             StoredDataType::REMOTE_TIMESTAMPS}));
 
   EXPECT_EQ(merger.sorted_until(), monotonic_clock::min_time);
 
@@ -937,11 +955,13 @@
 
   size_t mapper0_count = 0;
 
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1082,13 +1102,15 @@
   // messages due to the channel filter callbacks used
   size_t mapper0_count = 0;
 
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   mapper0.set_replay_channels_callback(
       [&](const TimestampedMessage &) -> bool { return mapper0_count != 2; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
   mapper1.set_replay_channels_callback(
@@ -1219,11 +1241,13 @@
   ASSERT_EQ(parts.size(), 1u);
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1341,11 +1365,13 @@
   ASSERT_EQ(parts[1].logger_node, "pi2");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1461,11 +1487,13 @@
   ASSERT_EQ(parts[1].logger_node, "pi2");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1545,11 +1573,13 @@
   ASSERT_EQ(parts[1].logger_node, "pi2");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1630,11 +1660,13 @@
   ASSERT_EQ(parts[1].logger_node, "pi2");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1705,11 +1737,13 @@
   ASSERT_EQ(parts[1].logger_node, "pi2");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1769,7 +1803,8 @@
   LogFilesContainer log_files(parts);
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
 
@@ -1811,7 +1846,8 @@
   ASSERT_EQ(parts[1].logger_node, "pi2");
 
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -1885,11 +1921,13 @@
   ASSERT_EQ(parts[1].logger_node, "pi2");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -2142,7 +2180,9 @@
   ASSERT_EQ(parts.size(), 1u);
   ASSERT_EQ(parts[0].parts.size(), 2u);
 
-  BootMerger merger("pi2", log_files);
+  BootMerger merger("pi2", log_files,
+                    {StoredDataType::DATA, StoredDataType::TIMESTAMPS,
+                     StoredDataType::REMOTE_TIMESTAMPS});
 
   EXPECT_EQ(merger.node(), 1u);
 
@@ -2326,11 +2366,13 @@
   ASSERT_EQ(parts[0].logger_node, "pi1");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 
@@ -2545,11 +2587,13 @@
   ASSERT_EQ(parts[0].logger_node, "pi1");
 
   size_t mapper0_count = 0;
-  TimestampMapper mapper0("pi1", log_files);
+  TimestampMapper mapper0("pi1", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper0.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper0_count; });
   size_t mapper1_count = 0;
-  TimestampMapper mapper1("pi2", log_files);
+  TimestampMapper mapper1("pi2", log_files,
+                          TimestampQueueStrategy::kQueueTogether);
   mapper1.set_timestamp_callback(
       [&](TimestampedMessage *) { ++mapper1_count; });
 

diff --git a/aos/events/logging/logfile_validator.cc b/aos/events/logging/logfile_validator.cc
new file mode 100644
index 0000000..19d11c8
--- /dev/null
+++ b/aos/events/logging/logfile_validator.cc

@@ -0,0 +1,162 @@
+
+#include "aos/events/logging/logfile_validator.h"
+
+#include "aos/events/logging/logfile_sorting.h"
+#include "aos/events/logging/logfile_utils.h"
+#include "aos/events/logging/logfile_validator.h"
+#include "aos/network/multinode_timestamp_filter.h"
+
+namespace aos::logger {
+bool MultiNodeLogIsReadable(const LogFilesContainer &log_files,
+                            bool skip_order_validation) {
+  const Configuration *config = log_files.config().get();
+
+  CHECK(configuration::MultiNode(config))
+      << ": Timestamps only make sense in a multi-node world.";
+
+  // Now, build up all the TimestampMapper classes to read and sort the data.
+  std::vector<std::unique_ptr<TimestampMapper>> mappers;
+
+  for (const Node *node : configuration::GetNodes(config)) {
+    auto node_name = MaybeNodeName(node);
+    // Confirm that all the parts are from the same boot if there are enough
+    // parts to not be from the same boot.
+    if (log_files.ContainsPartsForNode(node_name)) {
+      // Filter the parts relevant to each node when building the mapper.
+      mappers.emplace_back(std::make_unique<TimestampMapper>(
+          node_name, log_files, TimestampQueueStrategy::kQueueTogether));
+    } else {
+      mappers.emplace_back(nullptr);
+    }
+  }
+
+  // Now, build up the estimator used to solve for time.
+  message_bridge::MultiNodeNoncausalOffsetEstimator multinode_estimator(
+      config, config, log_files.boots(), skip_order_validation,
+      std::chrono::seconds(0));
+  multinode_estimator.set_reboot_found(
+      [config](distributed_clock::time_point reboot_time,
+               const std::vector<logger::BootTimestamp> &node_times) {
+        LOG(INFO) << "Rebooted at distributed " << reboot_time;
+        size_t node_index = 0;
+        for (const logger::BootTimestamp &time : node_times) {
+          LOG(INFO) << "  "
+                    << config->nodes()->Get(node_index)->name()->string_view()
+                    << " " << time;
+          ++node_index;
+        }
+      });
+
+  // Because RAII doesn't let us do non-fatal/non-exception things, use this
+  // when returning to handle certain cleanup-related checks that would normally
+  // happen fatally in the estimator destrictor.
+  auto preempt_destructor = [&multinode_estimator](bool success) {
+    if (!multinode_estimator.RunDestructorChecks()) {
+      return false;
+    }
+    return success;
+  };
+
+  {
+    std::vector<TimestampMapper *> timestamp_mappers;
+    for (std::unique_ptr<TimestampMapper> &mapper : mappers) {
+      timestamp_mappers.emplace_back(mapper.get());
+    }
+    multinode_estimator.SetTimestampMappers(std::move(timestamp_mappers));
+  }
+
+  // To make things more like the logger and faster, cache the node + channel ->
+  // filter mapping in a set of vectors.
+  std::vector<std::vector<message_bridge::NoncausalOffsetEstimator *>> filters;
+  filters.resize(configuration::NodesCount(config));
+
+  for (const Node *node : configuration::GetNodes(config)) {
+    const size_t node_index = configuration::GetNodeIndex(config, node);
+    filters[node_index].resize(config->channels()->size(), nullptr);
+    for (size_t channel_index = 0; channel_index < config->channels()->size();
+         ++channel_index) {
+      const Channel *channel = config->channels()->Get(channel_index);
+
+      if (!configuration::ChannelIsSendableOnNode(channel, node) &&
+          configuration::ChannelIsReadableOnNode(channel, node)) {
+        // We've got a message which is being forwarded to this node.
+        const Node *source_node = configuration::GetNode(
+            config, channel->source_node()->string_view());
+        filters[node_index][channel_index] =
+            multinode_estimator.GetFilter(node, source_node);
+      }
+    }
+  }
+
+  multinode_estimator.CheckGraph();
+
+  // Now, read all the timestamps for each node.  This is simpler than the
+  // logger on purpose.  It loads in *all* the timestamps in 1 go per node,
+  // ignoring memory usage.
+  for (const Node *node : configuration::GetNodes(config)) {
+    LOG(INFO) << "Reading all data for " << node->name()->string_view();
+    const size_t node_index = configuration::GetNodeIndex(config, node);
+    TimestampMapper *timestamp_mapper = mappers[node_index].get();
+    if (timestamp_mapper == nullptr) {
+      continue;
+    }
+    while (true) {
+      TimestampedMessage *m = timestamp_mapper->Front();
+      if (m == nullptr) {
+        break;
+      }
+      timestamp_mapper->PopFront();
+    }
+  }
+
+  // Don't get clever. Use the first time as the start time.  Note: this is
+  // different than how log_cat and others work.
+  std::optional<std::optional<const std::tuple<distributed_clock::time_point,
+                                               std::vector<BootTimestamp>> *>>
+      next_timestamp = multinode_estimator.QueueNextTimestamp();
+  if (!next_timestamp.has_value() || !next_timestamp.value().has_value()) {
+    return preempt_destructor(false);
+  }
+  LOG(INFO) << "Starting at:";
+  for (const Node *node : configuration::GetNodes(config)) {
+    const size_t node_index = configuration::GetNodeIndex(config, node);
+    LOG(INFO) << "  " << node->name()->string_view() << " -> "
+              << std::get<1>(*next_timestamp.value().value())[node_index].time;
+  }
+
+  std::vector<monotonic_clock::time_point> just_monotonic(
+      std::get<1>(*next_timestamp.value().value()).size());
+  for (size_t i = 0; i < just_monotonic.size(); ++i) {
+    CHECK_EQ(std::get<1>(*next_timestamp.value().value())[i].boot, 0u);
+    just_monotonic[i] = std::get<1>(*next_timestamp.value().value())[i].time;
+  }
+  multinode_estimator.Start(just_monotonic);
+
+  // As we pull off all the timestamps, the time problem is continually solved,
+  // filling in the CSV files.
+  while (true) {
+    std::optional<std::optional<const std::tuple<distributed_clock::time_point,
+                                                 std::vector<BootTimestamp>> *>>
+        next_timestamp = multinode_estimator.QueueNextTimestamp();
+    if (!next_timestamp.has_value()) {
+      return preempt_destructor(false);
+    }
+    if (!next_timestamp.value().has_value()) {
+      break;
+    }
+    multinode_estimator.ObserveTimePassed(
+        std::get<0>(*next_timestamp.value().value()));
+  }
+
+  LOG(INFO) << "Done";
+
+  return preempt_destructor(true);
+}
+
+bool LogIsReadableIfMultiNode(const LogFilesContainer &log_files) {
+  if (aos::configuration::NodesCount(log_files.config().get()) == 1u) {
+    return true;
+  }
+  return MultiNodeLogIsReadable(log_files);
+}
+}  // namespace aos::logger

diff --git a/aos/events/logging/logfile_validator.h b/aos/events/logging/logfile_validator.h
new file mode 100644
index 0000000..814b89e
--- /dev/null
+++ b/aos/events/logging/logfile_validator.h

@@ -0,0 +1,16 @@
+#ifndef AOS_EVENTS_LOGGING_LOGFILE_VALIDATOR_H_
+#define AOS_EVENTS_LOGGING_LOGFILE_VALIDATOR_H_
+#include "aos/events/logging/logfile_sorting.h"
+namespace aos::logger {
+// Attempts to validate that a log is readable without actually running the full
+// LogReader. This aims to allow the user to preempt fatal crashes that can
+// occur when trying to replay a log.
+// Returns true if successful.
+bool MultiNodeLogIsReadable(const LogFilesContainer &log_files,
+                            bool skip_order_validation = false);
+
+// Returns true if the requested log is either a single-node log or if the
+// MultiNodeLogIsReadable() returns true.
+bool LogIsReadableIfMultiNode(const LogFilesContainer &log_files);
+}  // namespace aos::logger
+#endif  // AOS_EVENTS_LOGGING_LOGFILE_VALIDATOR_H_

diff --git a/aos/events/logging/logger.fbs b/aos/events/logging/logger.fbs
index ec1a450..5733fa0 100644
--- a/aos/events/logging/logger.fbs
+++ b/aos/events/logging/logger.fbs

@@ -2,6 +2,15 @@
 
 namespace aos.logger;
 
+enum StoredDataType : uint8 {
+  // We have message data in this file.
+  DATA = 0,
+  // We have timestamps of delivered data in this file.
+  TIMESTAMPS = 1,
+  // We have remote timestamps in this file.
+  REMOTE_TIMESTAMPS = 2,
+}
+
 // A log file is a sequence of size prefixed flatbuffers.
 // The first flatbuffer will be the LogFileHeader, followed by an arbitrary
 // number of MessageHeaders.
@@ -32,6 +41,10 @@
   // at most this duration (in nanoseconds).  If the log reader buffers until
   // it finds messages this much newer than it's simulation time, it will never
   // find a message out of order.
+  //
+  // By definition this is the time for log reader to buffer messages after startup,
+  // i.e. messages sent before startup are not taken into account when checking for
+  // max out of order duration.
   max_out_of_order_duration:long (id: 2);
 
   // The configuration of the channels.  It is valid to have a log file with
@@ -205,6 +218,10 @@
   // Logger textual version.  This is normally the release name stamped into
   // the binary.
   logger_version:string (id:33);
+
+  // The types of data stored in this log file.  This can be used to find logs
+  // with only timestamps in them to pre-solve the time problem.
+  data_stored:[StoredDataType] (id:34);
 }
 
 // Table holding a message.

diff --git a/aos/events/logging/logger_main.cc b/aos/events/logging/logger_main.cc
index afc166f..f99fe4c 100644
--- a/aos/events/logging/logger_main.cc
+++ b/aos/events/logging/logger_main.cc

@@ -79,12 +79,11 @@
   aos::logger::Logger logger(&event_loop);
 
   if (FLAGS_rotate_every != 0.0) {
-    logger.set_on_logged_period([&] {
-      const auto now = event_loop.monotonic_now();
-      if (now > last_rotation_time +
-                    std::chrono::duration<double>(FLAGS_rotate_every)) {
+    logger.set_on_logged_period([&](aos::monotonic_clock::time_point t) {
+      if (t > last_rotation_time +
+                  std::chrono::duration<double>(FLAGS_rotate_every)) {
         logger.Rotate();
-        last_rotation_time = now;
+        last_rotation_time = t;
       }
     });
   }

diff --git a/aos/events/logging/multinode_logger_test.cc b/aos/events/logging/multinode_logger_test.cc
index 677fc72..b8d07a2 100644
--- a/aos/events/logging/multinode_logger_test.cc
+++ b/aos/events/logging/multinode_logger_test.cc

@@ -26,9 +26,29 @@
     ::testing::Combine(
         ::testing::Values(
             ConfigParams{"multinode_pingpong_combined_config.json", true,
-                         kCombinedConfigSha1(), kCombinedConfigSha1()},
+                         kCombinedConfigSha1(), kCombinedConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kForceBufferTimestamps},
+            ConfigParams{"multinode_pingpong_combined_config.json", true,
+                         kCombinedConfigSha1(), kCombinedConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kAutoBuffer},
             ConfigParams{"multinode_pingpong_split_config.json", false,
-                         kSplitConfigSha1(), kReloggedSplitConfigSha1()}),
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kForceBufferTimestamps},
+            ConfigParams{"multinode_pingpong_split_config.json", false,
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kAutoBuffer},
+            ConfigParams{"multinode_pingpong_split_config.json", false,
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kCombine,
+                         ForceTimestampBuffering::kForceBufferTimestamps},
+            ConfigParams{"multinode_pingpong_split_config.json", false,
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kCombine,
+                         ForceTimestampBuffering::kAutoBuffer}),
         ::testing::ValuesIn(SupportedCompressionAlgorithms())));
 
 INSTANTIATE_TEST_SUITE_P(
@@ -36,13 +56,37 @@
     ::testing::Combine(
         ::testing::Values(
             ConfigParams{"multinode_pingpong_combined_config.json", true,
-                         kCombinedConfigSha1(), kCombinedConfigSha1()},
+                         kCombinedConfigSha1(), kCombinedConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kForceBufferTimestamps},
+            ConfigParams{"multinode_pingpong_combined_config.json", true,
+                         kCombinedConfigSha1(), kCombinedConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kAutoBuffer},
             ConfigParams{"multinode_pingpong_split_config.json", false,
-                         kSplitConfigSha1(), kReloggedSplitConfigSha1()}),
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kForceBufferTimestamps},
+            ConfigParams{"multinode_pingpong_split_config.json", false,
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kKeepSeparate,
+                         ForceTimestampBuffering::kAutoBuffer},
+            ConfigParams{"multinode_pingpong_split_config.json", false,
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kCombine,
+                         ForceTimestampBuffering::kForceBufferTimestamps},
+            ConfigParams{"multinode_pingpong_split_config.json", false,
+                         kSplitConfigSha1(), kReloggedSplitConfigSha1(),
+                         FileStrategy::kCombine,
+                         ForceTimestampBuffering::kAutoBuffer}),
         ::testing::ValuesIn(SupportedCompressionAlgorithms())));
 
 // Tests that we can write and read simple multi-node log files.
 TEST_P(MultinodeLoggerTest, SimpleMultiNode) {
+  if (file_strategy() == FileStrategy::kCombine) {
+    GTEST_SKIP() << "We don't need to test the combined file writer this deep.";
+  }
+
   std::vector<std::string> actual_filenames;
   time_converter_.StartEqual();
 
@@ -79,11 +123,7 @@
     }
 
     EXPECT_EQ(logfile_uuids.size(), 2u);
-    if (shared()) {
-      EXPECT_EQ(parts_uuids.size(), 7u);
-    } else {
-      EXPECT_EQ(parts_uuids.size(), 8u);
-    }
+    EXPECT_EQ(parts_uuids.size(), 8u);
 
     // And confirm everything is on the correct node.
     EXPECT_EQ(log_header[2].message().node()->name()->string_view(), "pi1");
@@ -92,122 +132,128 @@
 
     EXPECT_EQ(log_header[5].message().node()->name()->string_view(), "pi2");
     EXPECT_EQ(log_header[6].message().node()->name()->string_view(), "pi2");
-
     EXPECT_EQ(log_header[7].message().node()->name()->string_view(), "pi2");
-    EXPECT_EQ(log_header[8].message().node()->name()->string_view(), "pi2");
-    EXPECT_EQ(log_header[9].message().node()->name()->string_view(), "pi2");
 
-    EXPECT_EQ(log_header[10].message().node()->name()->string_view(), "pi1");
-    EXPECT_EQ(log_header[11].message().node()->name()->string_view(), "pi1");
+    EXPECT_EQ(log_header[8].message().node()->name()->string_view(), "pi1");
+    EXPECT_EQ(log_header[9].message().node()->name()->string_view(), "pi1");
+
+    EXPECT_EQ(log_header[10].message().node()->name()->string_view(), "pi2");
+    EXPECT_EQ(log_header[11].message().node()->name()->string_view(), "pi2");
 
     EXPECT_EQ(log_header[12].message().node()->name()->string_view(), "pi2");
     EXPECT_EQ(log_header[13].message().node()->name()->string_view(), "pi2");
+    EXPECT_EQ(log_header[14].message().node()->name()->string_view(), "pi2");
 
-    if (shared()) {
-      EXPECT_EQ(log_header[14].message().node()->name()->string_view(), "pi2");
-      EXPECT_EQ(log_header[15].message().node()->name()->string_view(), "pi2");
-      EXPECT_EQ(log_header[16].message().node()->name()->string_view(), "pi2");
-
-      EXPECT_EQ(log_header[17].message().node()->name()->string_view(), "pi1");
-      EXPECT_EQ(log_header[18].message().node()->name()->string_view(), "pi1");
-    } else {
-      EXPECT_EQ(log_header[14].message().node()->name()->string_view(), "pi2");
-      EXPECT_EQ(log_header[15].message().node()->name()->string_view(), "pi2");
-
-      EXPECT_EQ(log_header[16].message().node()->name()->string_view(), "pi1");
-      EXPECT_EQ(log_header[17].message().node()->name()->string_view(), "pi1");
-
-      EXPECT_EQ(log_header[18].message().node()->name()->string_view(), "pi2");
-      EXPECT_EQ(log_header[19].message().node()->name()->string_view(), "pi2");
-    }
+    EXPECT_EQ(log_header[15].message().node()->name()->string_view(), "pi1");
+    EXPECT_EQ(log_header[16].message().node()->name()->string_view(), "pi1");
 
     // And the parts index matches.
     EXPECT_EQ(log_header[2].message().parts_index(), 0);
-    EXPECT_EQ(log_header[3].message().parts_index(), 1);
-    EXPECT_EQ(log_header[4].message().parts_index(), 2);
+
+    EXPECT_EQ(log_header[3].message().parts_index(), 0);
+    EXPECT_EQ(log_header[4].message().parts_index(), 1);
 
     EXPECT_EQ(log_header[5].message().parts_index(), 0);
-    EXPECT_EQ(log_header[6].message().parts_index(), 1);
 
-    EXPECT_EQ(log_header[7].message().parts_index(), 0);
-    EXPECT_EQ(log_header[8].message().parts_index(), 1);
-    EXPECT_EQ(log_header[9].message().parts_index(), 2);
+    EXPECT_EQ(log_header[6].message().parts_index(), 0);
+    EXPECT_EQ(log_header[7].message().parts_index(), 1);
+
+    EXPECT_EQ(log_header[8].message().parts_index(), 0);
+    EXPECT_EQ(log_header[9].message().parts_index(), 1);
 
     EXPECT_EQ(log_header[10].message().parts_index(), 0);
     EXPECT_EQ(log_header[11].message().parts_index(), 1);
 
     EXPECT_EQ(log_header[12].message().parts_index(), 0);
     EXPECT_EQ(log_header[13].message().parts_index(), 1);
+    EXPECT_EQ(log_header[14].message().parts_index(), 2);
 
-    if (shared()) {
-      EXPECT_EQ(log_header[14].message().parts_index(), 0);
-      EXPECT_EQ(log_header[15].message().parts_index(), 1);
-      EXPECT_EQ(log_header[16].message().parts_index(), 2);
+    EXPECT_EQ(log_header[15].message().parts_index(), 0);
+    EXPECT_EQ(log_header[16].message().parts_index(), 1);
 
-      EXPECT_EQ(log_header[17].message().parts_index(), 0);
-      EXPECT_EQ(log_header[18].message().parts_index(), 1);
-    } else {
-      EXPECT_EQ(log_header[14].message().parts_index(), 0);
-      EXPECT_EQ(log_header[15].message().parts_index(), 1);
+    // And that the data_stored field is right.
+    EXPECT_THAT(*log_header[2].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::DATA));
+    EXPECT_THAT(*log_header[3].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::TIMESTAMPS));
+    EXPECT_THAT(*log_header[4].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::TIMESTAMPS));
 
-      EXPECT_EQ(log_header[16].message().parts_index(), 0);
-      EXPECT_EQ(log_header[17].message().parts_index(), 1);
+    EXPECT_THAT(*log_header[5].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::DATA));
+    EXPECT_THAT(*log_header[6].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::TIMESTAMPS));
+    EXPECT_THAT(*log_header[7].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::TIMESTAMPS));
 
-      EXPECT_EQ(log_header[18].message().parts_index(), 0);
-      EXPECT_EQ(log_header[19].message().parts_index(), 1);
-    }
+    EXPECT_THAT(*log_header[8].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::DATA));
+    EXPECT_THAT(*log_header[9].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::DATA));
+
+    EXPECT_THAT(*log_header[10].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::DATA));
+    EXPECT_THAT(*log_header[11].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::DATA));
+
+    EXPECT_THAT(*log_header[12].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::REMOTE_TIMESTAMPS));
+    EXPECT_THAT(*log_header[13].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::REMOTE_TIMESTAMPS));
+    EXPECT_THAT(*log_header[14].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::REMOTE_TIMESTAMPS));
+
+    EXPECT_THAT(*log_header[15].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::REMOTE_TIMESTAMPS));
+    EXPECT_THAT(*log_header[16].message().data_stored(),
+                ::testing::ElementsAre(StoredDataType::REMOTE_TIMESTAMPS));
   }
 
-  const std::vector<LogFile> sorted_log_files = SortParts(logfiles_);
+  const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   {
     using ::testing::UnorderedElementsAre;
-    std::shared_ptr<const aos::Configuration> config =
-        sorted_log_files[0].config;
+    std::shared_ptr<const aos::Configuration> config = sorted_parts[0].config;
 
     // Timing reports, pings
-    EXPECT_THAT(CountChannelsData(config, logfiles_[2]),
-                UnorderedElementsAre(
-                    std::make_tuple("/pi1/aos",
-                                    "aos.message_bridge.ServerStatistics", 1),
-                    std::make_tuple("/test", "aos.examples.Ping", 1),
-                    std::make_tuple("/pi1/aos", "aos.examples.Ping", 1)))
-        << " : " << logfiles_[2];
-    {
-      std::vector<std::tuple<std::string, std::string, int>> channel_counts = {
-          std::make_tuple("/pi1/aos", "aos.examples.Ping", 10),
-          std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 1),
-          std::make_tuple("/pi1/aos", "aos.message_bridge.ClientStatistics",
-                          1)};
-      if (!std::get<0>(GetParam()).shared) {
-        channel_counts.push_back(
-            std::make_tuple("/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                            "aos-message_bridge-Timestamp",
-                            "aos.message_bridge.RemoteMessage", 1));
-      }
-      EXPECT_THAT(CountChannelsData(config, logfiles_[3]),
-                  ::testing::UnorderedElementsAreArray(channel_counts))
-          << " : " << logfiles_[3];
+    if (shared()) {
+      EXPECT_THAT(
+          CountChannelsData(config, logfiles_[2]),
+          UnorderedElementsAre(
+              std::make_tuple("/pi1/aos", "aos.examples.Ping", 2001),
+              std::make_tuple("/pi1/aos", "aos.message_bridge.ClientStatistics",
+                              200),
+              std::make_tuple("/pi1/aos", "aos.message_bridge.ServerStatistics",
+                              21),
+              std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 200),
+              std::make_tuple("/pi1/aos", "aos.timing.Report", 60),
+              std::make_tuple("/test", "aos.examples.Ping", 2001)))
+          << " : " << logfiles_[2];
+    } else {
+      EXPECT_THAT(
+          CountChannelsData(config, logfiles_[2]),
+          UnorderedElementsAre(
+              std::make_tuple("/pi1/aos", "aos.examples.Ping", 2001),
+              std::make_tuple("/pi1/aos", "aos.message_bridge.ClientStatistics",
+                              200),
+              std::make_tuple("/pi1/aos", "aos.message_bridge.ServerStatistics",
+                              21),
+              std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 200),
+              std::make_tuple("/pi1/aos", "aos.timing.Report", 60),
+              std::make_tuple("/test", "aos.examples.Ping", 2001),
+              std::make_tuple("/pi1/aos/remote_timestamps/pi2/pi1/aos/"
+                              "aos-message_bridge-Timestamp",
+                              "aos.message_bridge.RemoteMessage", 200)))
+          << " : " << logfiles_[2];
     }
-    {
-      std::vector<std::tuple<std::string, std::string, int>> channel_counts = {
-          std::make_tuple("/pi1/aos", "aos.examples.Ping", 1990),
-          std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 199),
-          std::make_tuple("/pi1/aos", "aos.message_bridge.ServerStatistics",
-                          20),
-          std::make_tuple("/pi1/aos", "aos.message_bridge.ClientStatistics",
-                          199),
-          std::make_tuple("/pi1/aos", "aos.timing.Report", 60),
-          std::make_tuple("/test", "aos.examples.Ping", 2000)};
-      if (!std::get<0>(GetParam()).shared) {
-        channel_counts.push_back(
-            std::make_tuple("/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                            "aos-message_bridge-Timestamp",
-                            "aos.message_bridge.RemoteMessage", 199));
-      }
-      EXPECT_THAT(CountChannelsData(config, logfiles_[4]),
-                  ::testing::UnorderedElementsAreArray(channel_counts))
-          << " : " << logfiles_[4];
-    }
+
+    EXPECT_THAT(CountChannelsData(config, logfiles_[3]),
+                ::testing::UnorderedElementsAre())
+        << " : " << logfiles_[3];
+    EXPECT_THAT(CountChannelsData(config, logfiles_[3]),
+                ::testing::UnorderedElementsAre())
+        << " : " << logfiles_[4];
+
     // Timestamps for pong
     EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[2]),
                 UnorderedElementsAre())
@@ -223,185 +269,121 @@
             std::make_tuple("/pi2/aos", "aos.message_bridge.Timestamp", 200)))
         << " : " << logfiles_[4];
 
-    // Pong data.
+    // Timing reports and pongs.
     EXPECT_THAT(
         CountChannelsData(config, logfiles_[5]),
-        UnorderedElementsAre(std::make_tuple("/test", "aos.examples.Pong", 91)))
+        UnorderedElementsAre(
+            std::make_tuple("/pi2/aos", "aos.examples.Ping", 2001),
+            std::make_tuple("/pi2/aos", "aos.message_bridge.ClientStatistics",
+                            200),
+            std::make_tuple("/pi2/aos", "aos.message_bridge.ServerStatistics",
+                            21),
+            std::make_tuple("/pi2/aos", "aos.message_bridge.Timestamp", 200),
+            std::make_tuple("/pi2/aos", "aos.timing.Report", 60),
+            std::make_tuple("/test", "aos.examples.Pong", 2001)))
         << " : " << logfiles_[5];
-    EXPECT_THAT(CountChannelsData(config, logfiles_[6]),
-                UnorderedElementsAre(
-                    std::make_tuple("/test", "aos.examples.Pong", 1910)))
+    EXPECT_THAT(CountChannelsData(config, logfiles_[6]), UnorderedElementsAre())
         << " : " << logfiles_[6];
-
-    // No timestamps
+    EXPECT_THAT(CountChannelsData(config, logfiles_[7]), UnorderedElementsAre())
+        << " : " << logfiles_[7];
+    // And ping timestamps.
     EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[5]),
                 UnorderedElementsAre())
         << " : " << logfiles_[5];
-    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[6]),
-                UnorderedElementsAre())
-        << " : " << logfiles_[6];
-
-    // Timing reports and pongs.
-    EXPECT_THAT(CountChannelsData(config, logfiles_[7]),
-                UnorderedElementsAre(
-                    std::make_tuple("/pi2/aos", "aos.examples.Ping", 1),
-                    std::make_tuple("/pi2/aos",
-                                    "aos.message_bridge.ServerStatistics", 1)))
-        << " : " << logfiles_[7];
     EXPECT_THAT(
-        CountChannelsData(config, logfiles_[8]),
-        UnorderedElementsAre(std::make_tuple("/test", "aos.examples.Pong", 1)))
-        << " : " << logfiles_[8];
-    EXPECT_THAT(
-        CountChannelsData(config, logfiles_[9]),
-        UnorderedElementsAre(
-            std::make_tuple("/pi2/aos", "aos.examples.Ping", 2000),
-            std::make_tuple("/pi2/aos", "aos.message_bridge.Timestamp", 200),
-            std::make_tuple("/pi2/aos", "aos.message_bridge.ServerStatistics",
-                            20),
-            std::make_tuple("/pi2/aos", "aos.message_bridge.ClientStatistics",
-                            200),
-            std::make_tuple("/pi2/aos", "aos.timing.Report", 60),
-            std::make_tuple("/test", "aos.examples.Pong", 2000)))
-        << " : " << logfiles_[9];
-    // And ping timestamps.
-    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[7]),
-                UnorderedElementsAre())
-        << " : " << logfiles_[7];
-    EXPECT_THAT(
-        CountChannelsTimestamp(config, logfiles_[8]),
+        CountChannelsTimestamp(config, logfiles_[6]),
         UnorderedElementsAre(std::make_tuple("/test", "aos.examples.Ping", 1)))
-        << " : " << logfiles_[8];
+        << " : " << logfiles_[6];
     EXPECT_THAT(
-        CountChannelsTimestamp(config, logfiles_[9]),
+        CountChannelsTimestamp(config, logfiles_[7]),
         UnorderedElementsAre(
             std::make_tuple("/test", "aos.examples.Ping", 2000),
             std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 200)))
-        << " : " << logfiles_[9];
+        << " : " << logfiles_[7];
 
     // And then test that the remotely logged timestamp data files only have
     // timestamps in them.
+    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[8]),
+                UnorderedElementsAre())
+        << " : " << logfiles_[8];
+    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[9]),
+                UnorderedElementsAre())
+        << " : " << logfiles_[9];
     EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[10]),
                 UnorderedElementsAre())
         << " : " << logfiles_[10];
     EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[11]),
                 UnorderedElementsAre())
         << " : " << logfiles_[11];
-    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[12]),
-                UnorderedElementsAre())
-        << " : " << logfiles_[12];
-    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[13]),
-                UnorderedElementsAre())
-        << " : " << logfiles_[13];
 
-    EXPECT_THAT(CountChannelsData(config, logfiles_[10]),
+    EXPECT_THAT(CountChannelsData(config, logfiles_[8]),
                 UnorderedElementsAre(std::make_tuple(
                     "/pi1/aos", "aos.message_bridge.Timestamp", 9)))
-        << " : " << logfiles_[10];
-    EXPECT_THAT(CountChannelsData(config, logfiles_[11]),
+        << " : " << logfiles_[8];
+    EXPECT_THAT(CountChannelsData(config, logfiles_[9]),
                 UnorderedElementsAre(std::make_tuple(
                     "/pi1/aos", "aos.message_bridge.Timestamp", 191)))
+        << " : " << logfiles_[9];
+
+    // Pong snd timestamp data.
+    EXPECT_THAT(
+        CountChannelsData(config, logfiles_[10]),
+        UnorderedElementsAre(
+            std::make_tuple("/pi2/aos", "aos.message_bridge.Timestamp", 9),
+            std::make_tuple("/test", "aos.examples.Pong", 91)))
+        << " : " << logfiles_[10];
+    EXPECT_THAT(
+        CountChannelsData(config, logfiles_[11]),
+        UnorderedElementsAre(
+            std::make_tuple("/pi2/aos", "aos.message_bridge.Timestamp", 191),
+            std::make_tuple("/test", "aos.examples.Pong", 1910)))
         << " : " << logfiles_[11];
 
+    // Timestamps from pi2 on pi1, and the other way.
+    // if (shared()) {
     EXPECT_THAT(CountChannelsData(config, logfiles_[12]),
-                UnorderedElementsAre(std::make_tuple(
-                    "/pi2/aos", "aos.message_bridge.Timestamp", 9)))
+                UnorderedElementsAre())
         << " : " << logfiles_[12];
     EXPECT_THAT(CountChannelsData(config, logfiles_[13]),
+                UnorderedElementsAre())
+        << " : " << logfiles_[13];
+    EXPECT_THAT(CountChannelsData(config, logfiles_[14]),
+                UnorderedElementsAre())
+        << " : " << logfiles_[14];
+    EXPECT_THAT(CountChannelsData(config, logfiles_[15]),
+                UnorderedElementsAre())
+        << " : " << logfiles_[15];
+    EXPECT_THAT(CountChannelsData(config, logfiles_[16]),
+                UnorderedElementsAre())
+        << " : " << logfiles_[16];
+
+    EXPECT_THAT(
+        CountChannelsTimestamp(config, logfiles_[12]),
+        UnorderedElementsAre(std::make_tuple("/test", "aos.examples.Ping", 1)))
+        << " : " << logfiles_[12];
+    EXPECT_THAT(
+        CountChannelsTimestamp(config, logfiles_[13]),
+        UnorderedElementsAre(
+            std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 9),
+            std::make_tuple("/test", "aos.examples.Ping", 90)))
+        << " : " << logfiles_[13];
+    EXPECT_THAT(
+        CountChannelsTimestamp(config, logfiles_[14]),
+        UnorderedElementsAre(
+            std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 191),
+            std::make_tuple("/test", "aos.examples.Ping", 1910)))
+        << " : " << logfiles_[14];
+    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[15]),
+                UnorderedElementsAre(std::make_tuple(
+                    "/pi2/aos", "aos.message_bridge.Timestamp", 9)))
+        << " : " << logfiles_[15];
+    EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[16]),
                 UnorderedElementsAre(std::make_tuple(
                     "/pi2/aos", "aos.message_bridge.Timestamp", 191)))
-        << " : " << logfiles_[13];
-
-    // Timestamps from pi2 on pi1, and the other way.
-    if (shared()) {
-      EXPECT_THAT(CountChannelsData(config, logfiles_[14]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[14];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[15]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[15];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[16]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[16];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[17]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[17];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[18]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[18];
-
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[14]),
-                  UnorderedElementsAre(
-                      std::make_tuple("/test", "aos.examples.Ping", 1)))
-          << " : " << logfiles_[14];
-      EXPECT_THAT(
-          CountChannelsTimestamp(config, logfiles_[15]),
-          UnorderedElementsAre(
-              std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 9),
-              std::make_tuple("/test", "aos.examples.Ping", 90)))
-          << " : " << logfiles_[15];
-      EXPECT_THAT(
-          CountChannelsTimestamp(config, logfiles_[16]),
-          UnorderedElementsAre(
-              std::make_tuple("/pi1/aos", "aos.message_bridge.Timestamp", 191),
-              std::make_tuple("/test", "aos.examples.Ping", 1910)))
-          << " : " << logfiles_[16];
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[17]),
-                  UnorderedElementsAre(std::make_tuple(
-                      "/pi2/aos", "aos.message_bridge.Timestamp", 9)))
-          << " : " << logfiles_[17];
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[18]),
-                  UnorderedElementsAre(std::make_tuple(
-                      "/pi2/aos", "aos.message_bridge.Timestamp", 191)))
-          << " : " << logfiles_[18];
-    } else {
-      EXPECT_THAT(CountChannelsData(config, logfiles_[14]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[14];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[15]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[15];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[16]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[16];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[17]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[17];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[18]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[18];
-      EXPECT_THAT(CountChannelsData(config, logfiles_[19]),
-                  UnorderedElementsAre())
-          << " : " << logfiles_[19];
-
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[14]),
-                  UnorderedElementsAre(std::make_tuple(
-                      "/pi1/aos", "aos.message_bridge.Timestamp", 9)))
-          << " : " << logfiles_[14];
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[15]),
-                  UnorderedElementsAre(std::make_tuple(
-                      "/pi1/aos", "aos.message_bridge.Timestamp", 191)))
-          << " : " << logfiles_[15];
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[16]),
-                  UnorderedElementsAre(std::make_tuple(
-                      "/pi2/aos", "aos.message_bridge.Timestamp", 9)))
-          << " : " << logfiles_[16];
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[17]),
-                  UnorderedElementsAre(std::make_tuple(
-                      "/pi2/aos", "aos.message_bridge.Timestamp", 191)))
-          << " : " << logfiles_[17];
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[18]),
-                  UnorderedElementsAre(
-                      std::make_tuple("/test", "aos.examples.Ping", 91)))
-          << " : " << logfiles_[18];
-      EXPECT_THAT(CountChannelsTimestamp(config, logfiles_[19]),
-                  UnorderedElementsAre(
-                      std::make_tuple("/test", "aos.examples.Ping", 1910)))
-          << " : " << logfiles_[19];
-    }
+        << " : " << logfiles_[16];
   }
 
-  LogReader reader(sorted_log_files);
+  LogReader reader(sorted_parts);
 
   SimulatedEventLoopFactory log_reader_factory(reader.configuration());
   log_reader_factory.set_send_delay(chrono::microseconds(0));
@@ -571,7 +553,8 @@
     pi2_logger.AppendAllFilenames(&actual_filenames);
   }
 
-  const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
 
   LogReader reader(sorted_parts, &config_.message());
   // Remap just on pi1.
@@ -644,7 +627,8 @@
     pi2_logger.AppendAllFilenames(&actual_filenames);
   }
 
-  const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
 
   LogReader reader(sorted_parts, &config_.message());
 
@@ -698,6 +682,8 @@
 // is forwarded
 TEST_P(MultinodeLoggerTest, OnlyDoBeforeSendCallbackOnSenderNode) {
   time_converter_.StartEqual();
+
+  std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLogger(pi1_);
     LoggerState pi2_logger = MakeLogger(pi2_);
@@ -708,9 +694,14 @@
     StartLogger(&pi2_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(20000));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+    pi2_logger.AppendAllFilenames(&filenames);
   }
 
-  LogReader reader(SortParts(logfiles_));
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
 
   int ping_count = 0;
   // Adds a callback which mutates the value of the pong message before the
@@ -783,7 +774,8 @@
     pi2_logger.AppendAllFilenames(&actual_filenames);
   }
 
-  const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
 
   LogReader reader(sorted_parts, &config_.message());
   SimulatedEventLoopFactory log_reader_factory(reader.configuration());
@@ -802,6 +794,8 @@
 // the LogReader constructor.
 TEST_P(MultinodeLoggerDeathTest, MultiNodeBadReplayConfig) {
   time_converter_.StartEqual();
+
+  std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLogger(pi1_);
     LoggerState pi2_logger = MakeLogger(pi2_);
@@ -812,6 +806,9 @@
     StartLogger(&pi2_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(20000));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+    pi2_logger.AppendAllFilenames(&filenames);
   }
 
   // Test that, if we add an additional node to the replay config that the
@@ -826,7 +823,8 @@
         }
       )");
 
-  const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   EXPECT_DEATH(LogReader(sorted_parts, &extra_nodes_config.message()),
                "Log file and replay config need to have matching nodes lists.");
 }
@@ -856,7 +854,9 @@
 
   // Since we delay starting pi2, it already knows about all the timestamps so
   // we don't end up with extra parts.
-  LogReader reader(SortParts(actual_filenames));
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
 
   SimulatedEventLoopFactory log_reader_factory(reader.configuration());
   log_reader_factory.set_send_delay(chrono::microseconds(0));
@@ -969,6 +969,7 @@
   const chrono::nanoseconds logger_run3 = time_converter_.AddMonotonic(
       {chrono::milliseconds(400), chrono::milliseconds(400)});
 
+  std::vector<std::string> actual_filenames;
   {
     LoggerState pi2_logger = MakeLogger(pi2_);
 
@@ -1009,14 +1010,19 @@
           (pi2_->monotonic_now() - pi1_->monotonic_now()) - initial_pi2_offset,
           event_loop_factory_.send_delay() +
               event_loop_factory_.network_delay());
+
+      pi1_logger.AppendAllFilenames(&actual_filenames);
     }
 
     // And log a bit more on pi2.
     event_loop_factory_.RunFor(logger_run3);
+
+    pi2_logger.AppendAllFilenames(&actual_filenames);
   }
 
-  LogReader reader(
-      SortParts(MakeLogFiles(logfile_base1_, logfile_base2_, 3, 3)));
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
 
   SimulatedEventLoopFactory log_reader_factory(reader.configuration());
   log_reader_factory.set_send_delay(chrono::microseconds(0));
@@ -1140,6 +1146,8 @@
 // Tests that we can sort a bunch of parts with an empty part.  We should ignore
 // it and remove it from the sorted list.
 TEST_P(MultinodeLoggerTest, SortEmptyParts) {
+  std::vector<std::string> actual_filenames;
+
   time_converter_.StartEqual();
   // Make a bunch of parts.
   {
@@ -1152,21 +1160,27 @@
     StartLogger(&pi2_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(2000));
+    pi1_logger.AppendAllFilenames(&actual_filenames);
+    pi2_logger.AppendAllFilenames(&actual_filenames);
   }
 
   // TODO(austin): Should we flip out if the file can't open?
   const std::string kEmptyFile("foobarinvalidfiledoesnotexist" + Extension());
 
   aos::util::WriteStringToFileOrDie(kEmptyFile, "");
-  logfiles_.emplace_back(kEmptyFile);
+  actual_filenames.emplace_back(kEmptyFile);
 
-  const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
   VerifyParts(sorted_parts, {kEmptyFile});
 }
 
 // Tests that we can sort a bunch of parts with the end missing off a
 // file.  We should use the part we can read.
 TEST_P(MultinodeLoggerTest, SortTruncatedParts) {
+  if (file_strategy() == FileStrategy::kCombine) {
+    GTEST_SKIP() << "We don't need to test the combined file writer this deep.";
+  }
+
   std::vector<std::string> actual_filenames;
   time_converter_.StartEqual();
   // Make a bunch of parts.
@@ -1192,10 +1206,10 @@
   // For snappy, needs to have enough data to be >1 chunk of compressed data so
   // that we don't corrupt the entire log part.
   ::std::string compressed_contents =
-      aos::util::ReadFileToStringOrDie(logfiles_[4]);
+      aos::util::ReadFileToStringOrDie(logfiles_[2]);
 
   aos::util::WriteStringToFileOrDie(
-      logfiles_[4],
+      logfiles_[2],
       compressed_contents.substr(0, compressed_contents.size() - 100));
 
   const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
@@ -1205,6 +1219,8 @@
 // Tests that if we remap a logged channel, it shows up correctly.
 TEST_P(MultinodeLoggerTest, RemapLoggedChannel) {
   time_converter_.StartEqual();
+
+  std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLogger(pi1_);
     LoggerState pi2_logger = MakeLogger(pi2_);
@@ -1215,9 +1231,14 @@
     StartLogger(&pi2_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(20000));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+    pi2_logger.AppendAllFilenames(&filenames);
   }
 
-  LogReader reader(SortParts(logfiles_));
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
 
   // Remap just on pi1.
   reader.RemapLoggedChannel<aos::timing::Report>(
@@ -1300,7 +1321,9 @@
     pi2_logger.AppendAllFilenames(&actual_filenames);
   }
 
-  LogReader reader(SortParts(actual_filenames));
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
 
   // Rename just on pi2. Add some global maps just to verify they get added in
   // the config and used correctly.
@@ -1407,7 +1430,9 @@
     event_loop_factory_.RunFor(chrono::milliseconds(20000));
   }
 
-  LogReader reader(SortParts(logfiles_));
+  const std::vector<LogFile> sorted_parts = SortParts(logfiles_);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
 
   reader.RemapLoggedChannel<examples::Ping>("/test");
 
@@ -1488,7 +1513,9 @@
     pi2_logger.AppendAllFilenames(&actual_filenames);
   }
 
-  LogReader reader(SortParts(actual_filenames));
+  const std::vector<LogFile> sorted_parts = SortParts(actual_filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
 
   std::vector<MapT> maps;
   {
@@ -1578,6 +1605,8 @@
 TEST_P(MultinodeLoggerTest, SingleNodeReplay) {
   time_converter_.StartEqual();
   constexpr chrono::milliseconds kStartupDelay(95);
+  std::vector<std::string> filenames;
+
   {
     LoggerState pi1_logger = MakeLogger(pi1_);
     LoggerState pi2_logger = MakeLogger(pi2_);
@@ -1588,10 +1617,13 @@
     StartLogger(&pi2_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(20000));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+    pi2_logger.AppendAllFilenames(&filenames);
   }
 
-  LogReader full_reader(SortParts(logfiles_));
-  LogReader single_node_reader(SortParts(logfiles_));
+  LogReader full_reader(SortParts(filenames));
+  LogReader single_node_reader(SortParts(filenames));
 
   SimulatedEventLoopFactory full_factory(full_reader.configuration());
   SimulatedEventLoopFactory single_node_factory(
@@ -1971,8 +2003,8 @@
     LoggerState pi2_logger = MakeLogger(
         log_reader_factory.GetNodeEventLoopFactory("pi2"), &log_reader_factory);
 
-    StartLogger(&pi1_logger, tmp_dir_ + "/relogged1");
-    StartLogger(&pi2_logger, tmp_dir_ + "/relogged2");
+    StartLogger(&pi1_logger, tmp_dir_ + "/logs/relogged1");
+    StartLogger(&pi2_logger, tmp_dir_ + "/logs/relogged2");
 
     log_reader_factory.Run();
   }
@@ -1982,8 +2014,9 @@
   // And verify that we can run the LogReader over the relogged files without
   // hitting any fatal errors.
   {
-    LogReader relogged_reader(SortParts(MakeLogFiles(
-        tmp_dir_ + "/relogged1", tmp_dir_ + "/relogged2", 3, 3, true)));
+    LogReader relogged_reader(SortParts(
+        MakeLogFiles(tmp_dir_ + "/logs/relogged1", tmp_dir_ + "/logs/relogged2",
+                     1, 1, 2, 2, true)));
     relogged_reader.Register();
 
     relogged_reader.event_loop_factory()->Run();
@@ -1992,8 +2025,8 @@
   // configuration.
   {
     LogReader relogged_reader(
-        SortParts(MakeLogFiles(tmp_dir_ + "/relogged1", tmp_dir_ + "/relogged2",
-                               3, 3, true)),
+        SortParts(MakeLogFiles(tmp_dir_ + "/logs/relogged1",
+                               tmp_dir_ + "/logs/relogged2", 1, 1, 2, 2, true)),
         reader.configuration());
     relogged_reader.Register();
 
@@ -2023,7 +2056,7 @@
   ASSERT_THAT(actual_filenames,
               ::testing::UnorderedElementsAreArray(logfiles_));
 
-  for (const LogFile &log_file : SortParts(logfiles_)) {
+  for (const LogFile &log_file : SortParts(actual_filenames)) {
     for (const LogParts &log_part : log_file.parts) {
       if (log_part.node == log_file.logger_node) {
         EXPECT_EQ(log_part.logger_monotonic_start_time,
@@ -2047,13 +2080,11 @@
 
 // Test that renaming the base, renames the folder.
 TEST_P(MultinodeLoggerTest, LoggerRenameFolder) {
-  util::UnlinkRecursive(tmp_dir_ + "/renamefolder");
-  util::UnlinkRecursive(tmp_dir_ + "/new-good");
   time_converter_.AddMonotonic(
       {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
-  logfile_base1_ = tmp_dir_ + "/renamefolder/multi_logfile1";
-  logfile_base2_ = tmp_dir_ + "/renamefolder/multi_logfile2";
-  logfiles_ = MakeLogFiles(logfile_base1_, logfile_base2_);
+  logfile_base1_ = tmp_dir_ + "/logs/renamefolder/multi_logfile1";
+  logfile_base2_ = tmp_dir_ + "/logs/renamefolder/multi_logfile2";
+
   LoggerState pi1_logger = MakeLogger(pi1_);
   LoggerState pi2_logger = MakeLogger(pi2_);
 
@@ -2061,8 +2092,8 @@
   StartLogger(&pi2_logger);
 
   event_loop_factory_.RunFor(chrono::milliseconds(10000));
-  logfile_base1_ = tmp_dir_ + "/new-good/multi_logfile1";
-  logfile_base2_ = tmp_dir_ + "/new-good/multi_logfile2";
+  logfile_base1_ = tmp_dir_ + "/logs/new-good/multi_logfile1";
+  logfile_base2_ = tmp_dir_ + "/logs/new-good/multi_logfile2";
   logfiles_ = MakeLogFiles(logfile_base1_, logfile_base2_);
 
   // Sequence of set_base_name and Rotate simulates rename operation. Since
@@ -2084,14 +2115,13 @@
 TEST_P(MultinodeLoggerDeathTest, LoggerRenameFile) {
   time_converter_.AddMonotonic(
       {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
-  util::UnlinkRecursive(tmp_dir_ + "/renamefile");
-  logfile_base1_ = tmp_dir_ + "/renamefile/multi_logfile1";
-  logfile_base2_ = tmp_dir_ + "/renamefile/multi_logfile2";
-  logfiles_ = MakeLogFiles(logfile_base1_, logfile_base2_);
+  logfile_base1_ = tmp_dir_ + "/logs/renamefile/multi_logfile1";
+  logfile_base2_ = tmp_dir_ + "/logs/renamefile/multi_logfile2";
+
   LoggerState pi1_logger = MakeLogger(pi1_);
   StartLogger(&pi1_logger);
   event_loop_factory_.RunFor(chrono::milliseconds(10000));
-  logfile_base1_ = tmp_dir_ + "/new-renamefile/new_multi_logfile1";
+  logfile_base1_ = tmp_dir_ + "/logs/new-renamefile/new_multi_logfile1";
   EXPECT_DEATH({ pi1_logger.log_namer->set_base_name(logfile_base1_); },
                "Rename of file base from");
 }
@@ -2103,6 +2133,9 @@
 // This should be enough that we can then re-run the logger and get a valid log
 // back.
 TEST_P(MultinodeLoggerTest, RemoteReboot) {
+  if (file_strategy() == FileStrategy::kCombine) {
+    GTEST_SKIP() << "We don't need to test the combined file writer this deep.";
+  }
   std::vector<std::string> actual_filenames;
 
   const UUID pi1_boot0 = UUID::Random();
@@ -2179,8 +2212,7 @@
       //
       // TODO(austin): I'm not the most thrilled with this test pattern...  It
       // feels brittle in a different way.
-      if (file.find("aos.message_bridge.RemoteMessage") == std::string::npos ||
-          !shared()) {
+      if (file.find("timestamps/remote_pi2") == std::string::npos) {
         switch (log_header->message().parts_index()) {
           case 0:
             EXPECT_EQ(source_node_boot_uuid, pi2_boot0);
@@ -2317,99 +2349,115 @@
               monotonic_clock::max_time);
     EXPECT_EQ(oldest_logger_local_unreliable_monotonic_timestamps,
               monotonic_clock::max_time);
-    switch (log_header->message().parts_index()) {
-      case 0:
-        EXPECT_EQ(oldest_remote_monotonic_timestamps,
-                  monotonic_clock::max_time);
-        EXPECT_EQ(oldest_local_monotonic_timestamps, monotonic_clock::max_time);
-        EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::max_time);
-        EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::max_time);
-        EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                  monotonic_clock::max_time);
-        EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                  monotonic_clock::max_time);
-        break;
-      case 1:
-        EXPECT_EQ(oldest_remote_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90200)));
-        EXPECT_EQ(oldest_local_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90350)));
-        EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90200)));
-        EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90350)));
-        EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                  monotonic_clock::max_time);
-        EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                  monotonic_clock::max_time);
-        break;
-      case 2:
-        EXPECT_EQ(oldest_remote_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90200)))
-            << file;
-        EXPECT_EQ(oldest_local_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90350)))
-            << file;
-        EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90200)))
-            << file;
-        EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(90350)))
-            << file;
-        EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(100000)))
-            << file;
-        EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(100150)))
-            << file;
-        break;
-      case 3:
-        EXPECT_EQ(oldest_remote_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::milliseconds(1323) +
-                                              chrono::microseconds(200)));
-        EXPECT_EQ(oldest_local_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(10100350)));
-        EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::milliseconds(1323) +
-                                              chrono::microseconds(200)));
-        EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(10100350)));
-        EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                  monotonic_clock::max_time)
-            << file;
-        EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                  monotonic_clock::max_time)
-            << file;
-        break;
-      case 4:
-        EXPECT_EQ(oldest_remote_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::milliseconds(1323) +
-                                              chrono::microseconds(200)));
-        EXPECT_EQ(oldest_local_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(10100350)));
-        EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::milliseconds(1323) +
-                                              chrono::microseconds(200)));
-        EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(10100350)));
-        EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(1423000)))
-            << file;
-        EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                  monotonic_clock::time_point(chrono::microseconds(10200150)))
-            << file;
-        break;
-      default:
-        FAIL();
-        break;
+    if (log_header->message().data_stored()->Get(0) == StoredDataType::DATA) {
+      switch (log_header->message().parts_index()) {
+        case 0:
+          ASSERT_EQ(oldest_remote_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          EXPECT_EQ(oldest_local_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          break;
+        default:
+          FAIL();
+          break;
+      }
+    } else if (log_header->message().data_stored()->Get(0) ==
+               StoredDataType::TIMESTAMPS) {
+      switch (log_header->message().parts_index()) {
+        case 0:
+          ASSERT_EQ(oldest_remote_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90200)));
+          EXPECT_EQ(oldest_local_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90350)));
+          EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90200)));
+          EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90350)));
+          EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                    monotonic_clock::max_time);
+          break;
+        case 1:
+          ASSERT_EQ(oldest_remote_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90200)))
+              << file;
+          EXPECT_EQ(oldest_local_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90350)))
+              << file;
+          EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90200)))
+              << file;
+          EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(90350)))
+              << file;
+          EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(100000)))
+              << file;
+          EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(100150)))
+              << file;
+          break;
+        case 2:
+          ASSERT_EQ(oldest_remote_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::milliseconds(1323) +
+                                                chrono::microseconds(200)));
+          EXPECT_EQ(
+              oldest_local_monotonic_timestamps,
+              monotonic_clock::time_point(chrono::microseconds(10100350)));
+          EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::milliseconds(1323) +
+                                                chrono::microseconds(200)));
+          EXPECT_EQ(
+              oldest_local_unreliable_monotonic_timestamps,
+              monotonic_clock::time_point(chrono::microseconds(10100350)));
+          EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                    monotonic_clock::max_time)
+              << file;
+          EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                    monotonic_clock::max_time)
+              << file;
+          break;
+        case 3:
+          ASSERT_EQ(oldest_remote_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::milliseconds(1323) +
+                                                chrono::microseconds(200)));
+          EXPECT_EQ(
+              oldest_local_monotonic_timestamps,
+              monotonic_clock::time_point(chrono::microseconds(10100350)));
+          EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::milliseconds(1323) +
+                                                chrono::microseconds(200)));
+          EXPECT_EQ(
+              oldest_local_unreliable_monotonic_timestamps,
+              monotonic_clock::time_point(chrono::microseconds(10100350)));
+          EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(1423000)))
+              << file;
+          EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                    monotonic_clock::time_point(chrono::microseconds(10200150)))
+              << file;
+          break;
+        default:
+          FAIL();
+          break;
+      }
     }
   }
 
   // Confirm that we refuse to replay logs with missing boot uuids.
   {
-    LogReader reader(SortParts(pi1_reboot_logfiles_));
+    auto sorted_parts = SortParts(pi1_reboot_logfiles_);
+    EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+    LogReader reader(sorted_parts);
 
     SimulatedEventLoopFactory log_reader_factory(reader.configuration());
     log_reader_factory.set_send_delay(chrono::microseconds(0));
@@ -2427,6 +2475,10 @@
 // Tests that we can sort a log which only has timestamps from the remote
 // because the local message_bridge_client failed to connect.
 TEST_P(MultinodeLoggerTest, RemoteRebootOnlyTimestamps) {
+  if (file_strategy() == FileStrategy::kCombine) {
+    GTEST_SKIP() << "We don't need to test the combined file writer this deep.";
+  }
+
   const UUID pi1_boot0 = UUID::Random();
   const UUID pi2_boot0 = UUID::Random();
   const UUID pi2_boot1 = UUID::Random();
@@ -2541,8 +2593,7 @@
               2u);
 
     if (log_header->message().node()->name()->string_view() != "pi1") {
-      ASSERT_TRUE(file.find("aos.message_bridge.RemoteMessage") !=
-                  std::string::npos);
+      ASSERT_TRUE(file.find("timestamps/remote_pi2") != std::string::npos);
 
       const std::optional<SizePrefixedFlatbufferVector<MessageHeader>> msg =
           ReadNthMessage(file, 0);
@@ -2629,234 +2680,150 @@
       SCOPED_TRACE(aos::FlatbufferToJson(
           *log_header, {.multi_line = true, .max_vector_size = 100}));
 
-      if (shared()) {
-        // Confirm that the oldest timestamps match what we expect.  Based on
-        // what we are doing, we know that the oldest time is the first
-        // message's time.
-        //
-        // This makes the test robust to both the split and combined config
-        // tests.
-        switch (log_header->message().parts_index()) {
-          case 0:
-            EXPECT_EQ(oldest_remote_monotonic_timestamps,
+      // Confirm that the oldest timestamps match what we expect.  Based on
+      // what we are doing, we know that the oldest time is the first
+      // message's time.
+      //
+      // This makes the test robust to both the split and combined config
+      // tests.
+      switch (log_header->message().parts_index()) {
+        case 0:
+          EXPECT_EQ(oldest_remote_monotonic_timestamps,
+                    expected_oldest_remote_monotonic_timestamps);
+          EXPECT_EQ(oldest_local_monotonic_timestamps,
+                    expected_oldest_local_monotonic_timestamps);
+          EXPECT_EQ(oldest_logger_remote_unreliable_monotonic_timestamps,
+                    expected_oldest_local_monotonic_timestamps)
+              << file;
+          EXPECT_EQ(oldest_logger_local_unreliable_monotonic_timestamps,
+                    expected_oldest_timestamp_monotonic_timestamps)
+              << file;
+
+          if (reliable) {
+            EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
                       expected_oldest_remote_monotonic_timestamps);
-            EXPECT_EQ(oldest_local_monotonic_timestamps,
+            EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
                       expected_oldest_local_monotonic_timestamps);
-            EXPECT_EQ(oldest_logger_remote_unreliable_monotonic_timestamps,
-                      expected_oldest_local_monotonic_timestamps)
-                << file;
-            EXPECT_EQ(oldest_logger_local_unreliable_monotonic_timestamps,
-                      expected_oldest_timestamp_monotonic_timestamps)
-                << file;
-
-            if (reliable) {
-              EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-            } else {
-              EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-            }
-            break;
-          case 1:
-            EXPECT_EQ(oldest_remote_monotonic_timestamps,
-                      monotonic_clock::epoch() + chrono::nanoseconds(90000000));
-            EXPECT_EQ(oldest_local_monotonic_timestamps,
-                      monotonic_clock::epoch() + chrono::nanoseconds(90150000));
-            EXPECT_EQ(oldest_logger_remote_unreliable_monotonic_timestamps,
-                      monotonic_clock::epoch() + chrono::nanoseconds(90150000));
-            EXPECT_EQ(oldest_logger_local_unreliable_monotonic_timestamps,
-                      monotonic_clock::epoch() + chrono::nanoseconds(90250000));
-            if (reliable) {
-              EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-              EXPECT_EQ(
-                  oldest_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(90000000));
-              EXPECT_EQ(
-                  oldest_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(90150000));
-            } else {
-              EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-            }
-            break;
-          case 2:
-            EXPECT_EQ(
-                oldest_remote_monotonic_timestamps,
-                monotonic_clock::epoch() + chrono::nanoseconds(10000000000));
-            EXPECT_EQ(
-                oldest_local_monotonic_timestamps,
-                monotonic_clock::epoch() + chrono::nanoseconds(1323100000));
-            EXPECT_EQ(oldest_logger_remote_unreliable_monotonic_timestamps,
-                      expected_oldest_local_monotonic_timestamps)
-                << file;
-            EXPECT_EQ(oldest_logger_local_unreliable_monotonic_timestamps,
-                      expected_oldest_timestamp_monotonic_timestamps)
-                << file;
-            if (reliable) {
-              EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-            } else {
-              EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-            }
-            break;
-
-          case 3:
-            EXPECT_EQ(
-                oldest_remote_monotonic_timestamps,
-                monotonic_clock::epoch() + chrono::nanoseconds(10000000000));
-            EXPECT_EQ(
-                oldest_local_monotonic_timestamps,
-                monotonic_clock::epoch() + chrono::nanoseconds(1323100000));
+            EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+            EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+          } else {
+            EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+            EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
             EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
                       expected_oldest_remote_monotonic_timestamps);
             EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
                       expected_oldest_local_monotonic_timestamps);
-            EXPECT_EQ(
-                oldest_logger_remote_unreliable_monotonic_timestamps,
-                monotonic_clock::epoch() + chrono::nanoseconds(1323100000));
-            EXPECT_EQ(
-                oldest_logger_local_unreliable_monotonic_timestamps,
-                monotonic_clock::epoch() + chrono::nanoseconds(10100200000));
-            break;
-          default:
-            FAIL();
-            break;
-        }
+          }
+          break;
+        case 1:
+          EXPECT_EQ(oldest_remote_monotonic_timestamps,
+                    monotonic_clock::epoch() + chrono::nanoseconds(90000000));
+          EXPECT_EQ(oldest_local_monotonic_timestamps,
+                    monotonic_clock::epoch() + chrono::nanoseconds(90150000));
+          EXPECT_EQ(oldest_logger_remote_unreliable_monotonic_timestamps,
+                    monotonic_clock::epoch() + chrono::nanoseconds(90150000));
+          EXPECT_EQ(oldest_logger_local_unreliable_monotonic_timestamps,
+                    monotonic_clock::epoch() + chrono::nanoseconds(90250000));
+          if (reliable) {
+            EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                      expected_oldest_remote_monotonic_timestamps);
+            EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                      expected_oldest_local_monotonic_timestamps);
+            EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                      monotonic_clock::epoch() + chrono::nanoseconds(90000000));
+            EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                      monotonic_clock::epoch() + chrono::nanoseconds(90150000));
+          } else {
+            EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+            EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+            EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                      expected_oldest_remote_monotonic_timestamps);
+            EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                      expected_oldest_local_monotonic_timestamps);
+          }
+          break;
+        case 2:
+          EXPECT_EQ(
+              oldest_remote_monotonic_timestamps,
+              monotonic_clock::epoch() + chrono::nanoseconds(10000000000));
+          EXPECT_EQ(oldest_local_monotonic_timestamps,
+                    monotonic_clock::epoch() + chrono::nanoseconds(1323100000));
+          EXPECT_EQ(oldest_logger_remote_unreliable_monotonic_timestamps,
+                    expected_oldest_local_monotonic_timestamps)
+              << file;
+          EXPECT_EQ(oldest_logger_local_unreliable_monotonic_timestamps,
+                    expected_oldest_timestamp_monotonic_timestamps)
+              << file;
+          if (reliable) {
+            EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                      expected_oldest_remote_monotonic_timestamps);
+            EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                      expected_oldest_local_monotonic_timestamps);
+            EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+            EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+          } else {
+            EXPECT_EQ(oldest_remote_reliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+            EXPECT_EQ(oldest_local_reliable_monotonic_timestamps,
+                      monotonic_clock::max_time);
+            EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                      expected_oldest_remote_monotonic_timestamps);
+            EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                      expected_oldest_local_monotonic_timestamps);
+          }
+          break;
 
-        switch (log_header->message().parts_index()) {
-          case 0:
-            EXPECT_EQ(source_node_boot_uuid, pi2_boot0);
-            EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
-            break;
-          case 1:
-            EXPECT_EQ(source_node_boot_uuid, pi2_boot0);
-            EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
-            break;
-          case 2:
-            EXPECT_EQ(source_node_boot_uuid, pi2_boot1);
-            EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
-            break;
-          case 3:
-            if (shared()) {
-              EXPECT_EQ(source_node_boot_uuid, pi2_boot1);
-              EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
-              break;
-            }
-            [[fallthrough]];
-          default:
-            FAIL();
-            break;
-        }
-      } else {
-        switch (log_header->message().parts_index()) {
-          case 0:
-            if (reliable) {
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(
-                  oldest_logger_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(100150000))
-                  << file;
-              EXPECT_EQ(
-                  oldest_logger_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(100250000))
-                  << file;
-            } else {
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-              EXPECT_EQ(
-                  oldest_logger_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(90150000))
-                  << file;
-              EXPECT_EQ(
-                  oldest_logger_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(90250000))
-                  << file;
-            }
-            break;
-          case 1:
-            if (reliable) {
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        monotonic_clock::max_time);
-              EXPECT_EQ(
-                  oldest_logger_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(1323100000));
-              EXPECT_EQ(
-                  oldest_logger_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(10100200000));
-            } else {
-              EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
-                        expected_oldest_remote_monotonic_timestamps);
-              EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
-                        expected_oldest_local_monotonic_timestamps);
-              EXPECT_EQ(
-                  oldest_logger_remote_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(1323150000));
-              EXPECT_EQ(
-                  oldest_logger_local_unreliable_monotonic_timestamps,
-                  monotonic_clock::epoch() + chrono::nanoseconds(10100250000));
-            }
-            break;
-          default:
-            FAIL();
-            break;
-        }
-
-        switch (log_header->message().parts_index()) {
-          case 0:
-            EXPECT_EQ(source_node_boot_uuid, pi2_boot0);
-            EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
-            break;
-          case 1:
-            EXPECT_EQ(source_node_boot_uuid, pi2_boot1);
-            EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
-            break;
-          default:
-            FAIL();
-            break;
-        }
+        case 3:
+          EXPECT_EQ(
+              oldest_remote_monotonic_timestamps,
+              monotonic_clock::epoch() + chrono::nanoseconds(10000000000));
+          EXPECT_EQ(oldest_local_monotonic_timestamps,
+                    monotonic_clock::epoch() + chrono::nanoseconds(1323100000));
+          EXPECT_EQ(oldest_remote_unreliable_monotonic_timestamps,
+                    expected_oldest_remote_monotonic_timestamps);
+          EXPECT_EQ(oldest_local_unreliable_monotonic_timestamps,
+                    expected_oldest_local_monotonic_timestamps);
+          EXPECT_EQ(oldest_logger_remote_unreliable_monotonic_timestamps,
+                    monotonic_clock::epoch() + chrono::nanoseconds(1323100000));
+          EXPECT_EQ(
+              oldest_logger_local_unreliable_monotonic_timestamps,
+              monotonic_clock::epoch() + chrono::nanoseconds(10100200000));
+          break;
+        default:
+          FAIL();
+          break;
       }
 
+      switch (log_header->message().parts_index()) {
+        case 0:
+          EXPECT_EQ(source_node_boot_uuid, pi2_boot0);
+          EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
+          break;
+        case 1:
+          EXPECT_EQ(source_node_boot_uuid, pi2_boot0);
+          EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
+          break;
+        case 2:
+          EXPECT_EQ(source_node_boot_uuid, pi2_boot1);
+          EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
+          break;
+        case 3:
+          EXPECT_EQ(source_node_boot_uuid, pi2_boot1);
+          EXPECT_EQ(monotonic_start_time, monotonic_clock::min_time);
+          break;
+          [[fallthrough]];
+        default:
+          FAIL();
+          break;
+      }
       continue;
     }
     EXPECT_EQ(
@@ -2908,15 +2875,13 @@
     }
   }
 
-  if (shared()) {
-    EXPECT_EQ(timestamp_file_count, 4u);
-  } else {
-    EXPECT_EQ(timestamp_file_count, 4u);
-  }
+  EXPECT_EQ(timestamp_file_count, 4u);
 
   // Confirm that we can actually sort the resulting log and read it.
   {
-    LogReader reader(SortParts(filenames));
+    auto sorted_parts = SortParts(filenames);
+    EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+    LogReader reader(sorted_parts);
 
     SimulatedEventLoopFactory log_reader_factory(reader.configuration());
     log_reader_factory.set_send_delay(chrono::microseconds(0));
@@ -2934,6 +2899,8 @@
 // Tests that we properly handle one direction of message_bridge being
 // unavailable.
 TEST_P(MultinodeLoggerTest, OneDirectionWithNegativeSlope) {
+  std::vector<std::string> actual_filenames;
+
   pi1_->Disconnect(pi2_->node());
   time_converter_.AddMonotonic(
       {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
@@ -2949,11 +2916,12 @@
     StartLogger(&pi1_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(10000));
+    pi1_logger.AppendAllFilenames(&actual_filenames);
   }
 
-  // Confirm that we can parse the result.  LogReader has enough internal CHECKs
-  // to confirm the right thing happened.
-  ConfirmReadable(pi1_single_direction_logfiles_);
+  // Confirm that we can parse the result.  LogReader has enough internal
+  // CHECKs to confirm the right thing happened.
+  ConfirmReadable(actual_filenames);
 }
 
 // Tests that we properly handle one direction of message_bridge being
@@ -2966,6 +2934,8 @@
   time_converter_.AddMonotonic(
       {chrono::milliseconds(10000),
        chrono::milliseconds(10000) + chrono::milliseconds(1)});
+
+  std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLogger(pi1_);
 
@@ -2974,11 +2944,12 @@
     StartLogger(&pi1_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(10000));
+    pi1_logger.AppendAllFilenames(&filenames);
   }
 
-  // Confirm that we can parse the result.  LogReader has enough internal CHECKs
-  // to confirm the right thing happened.
-  ConfirmReadable(pi1_single_direction_logfiles_);
+  // Confirm that we can parse the result.  LogReader has enough internal
+  // CHECKs to confirm the right thing happened.
+  ConfirmReadable(filenames);
 }
 
 // Tests that we explode if someone passes in a part file twice with a better
@@ -2986,6 +2957,8 @@
 TEST_P(MultinodeLoggerTest, DuplicateLogFiles) {
   time_converter_.AddMonotonic(
       {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
+
+  std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLogger(pi1_);
 
@@ -2994,10 +2967,12 @@
     StartLogger(&pi1_logger);
 
     event_loop_factory_.RunFor(chrono::milliseconds(10000));
+
+    pi1_logger.AppendAllFilenames(&filenames);
   }
 
   std::vector<std::string> duplicates;
-  for (const std::string &f : pi1_single_direction_logfiles_) {
+  for (const std::string &f : filenames) {
     duplicates.emplace_back(f);
     duplicates.emplace_back(f);
   }
@@ -3006,6 +2981,9 @@
 
 // Tests that we explode if someone loses a part out of the middle of a log.
 TEST_P(MultinodeLoggerTest, MissingPartsFromMiddle) {
+  if (file_strategy() == FileStrategy::kCombine) {
+    GTEST_SKIP() << "We don't need to test the combined file writer this deep.";
+  }
   time_converter_.AddMonotonic(
       {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
   {
@@ -3016,21 +2994,24 @@
     StartLogger(&pi1_logger);
     aos::monotonic_clock::time_point last_rotation_time =
         pi1_logger.event_loop->monotonic_now();
-    pi1_logger.logger->set_on_logged_period([&] {
-      const auto now = pi1_logger.event_loop->monotonic_now();
-      if (now > last_rotation_time + std::chrono::seconds(5)) {
-        pi1_logger.logger->Rotate();
-        last_rotation_time = now;
-      }
-    });
+    pi1_logger.logger->set_on_logged_period(
+        [&](aos::monotonic_clock::time_point) {
+          const auto now = pi1_logger.event_loop->monotonic_now();
+          if (now > last_rotation_time + std::chrono::seconds(5)) {
+            pi1_logger.logger->Rotate();
+            last_rotation_time = now;
+          }
+        });
 
     event_loop_factory_.RunFor(chrono::milliseconds(10000));
   }
 
   std::vector<std::string> missing_parts;
 
-  missing_parts.emplace_back(logfile_base1_ + "_pi1_data.part0" + Extension());
-  missing_parts.emplace_back(logfile_base1_ + "_pi1_data.part2" + Extension());
+  missing_parts.emplace_back(logfile_base1_ + "_pi1_timestamps.part0" +
+                             Extension());
+  missing_parts.emplace_back(logfile_base1_ + "_pi1_timestamps.part2" +
+                             Extension());
   missing_parts.emplace_back(absl::StrCat(
       logfile_base1_, "_", std::get<0>(GetParam()).sha256, Extension()));
 
@@ -3038,8 +3019,8 @@
                "Broken log, missing part files between");
 }
 
-// Tests that we properly handle a dead node.  Do this by just disconnecting it
-// and only using one nodes of logs.
+// Tests that we properly handle a dead node.  Do this by just disconnecting
+// it and only using one nodes of logs.
 TEST_P(MultinodeLoggerTest, DeadNode) {
   pi1_->Disconnect(pi2_->node());
   pi2_->Disconnect(pi1_->node());
@@ -3055,14 +3036,14 @@
     event_loop_factory_.RunFor(chrono::milliseconds(10000));
   }
 
-  // Confirm that we can parse the result.  LogReader has enough internal CHECKs
-  // to confirm the right thing happened.
+  // Confirm that we can parse the result.  LogReader has enough internal
+  // CHECKs to confirm the right thing happened.
   ConfirmReadable(MakePi1DeadNodeLogfiles());
 }
 
-// Tests that we can relog with a different config.  This makes most sense when
-// you are trying to edit a log and want to use channel renaming + the original
-// config in the new log.
+// Tests that we can relog with a different config.  This makes most sense
+// when you are trying to edit a log and want to use channel renaming + the
+// original config in the new log.
 TEST_P(MultinodeLoggerTest, LogDifferentConfig) {
   time_converter_.StartEqual();
   {
@@ -3077,7 +3058,9 @@
     event_loop_factory_.RunFor(chrono::milliseconds(20000));
   }
 
-  LogReader reader(SortParts(logfiles_));
+  auto sorted_parts = SortParts(logfiles_);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  LogReader reader(sorted_parts);
   reader.RemapLoggedChannel<aos::examples::Ping>("/test", "/original");
 
   SimulatedEventLoopFactory log_reader_factory(reader.configuration());
@@ -3116,16 +3099,16 @@
         MakeLogger(log_reader_factory.GetNodeEventLoopFactory("pi2"),
                    &log_reader_factory, reader.logged_configuration());
 
-    pi1_logger.StartLogger(tmp_dir_ + "/relogged1");
-    pi2_logger.StartLogger(tmp_dir_ + "/relogged2");
+    pi1_logger.StartLogger(tmp_dir_ + "/logs/relogged1");
+    pi2_logger.StartLogger(tmp_dir_ + "/logs/relogged2");
 
     log_reader_factory.Run();
 
     for (auto &x : pi1_logger.log_namer->all_filenames()) {
-      log_files.emplace_back(absl::StrCat(tmp_dir_, "/relogged1_", x));
+      log_files.emplace_back(absl::StrCat(tmp_dir_, "/logs/relogged1_", x));
     }
     for (auto &x : pi2_logger.log_namer->all_filenames()) {
-      log_files.emplace_back(absl::StrCat(tmp_dir_, "/relogged2_", x));
+      log_files.emplace_back(absl::StrCat(tmp_dir_, "/logs/relogged2_", x));
     }
   }
 
@@ -3134,18 +3117,23 @@
   // And verify that we can run the LogReader over the relogged files without
   // hitting any fatal errors.
   {
-    LogReader relogged_reader(SortParts(log_files));
+    auto sorted_parts = SortParts(log_files);
+    EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+    LogReader relogged_reader(sorted_parts);
     relogged_reader.Register();
 
     relogged_reader.event_loop_factory()->Run();
   }
 }
 
-// Tests that we properly replay a log where the start time for a node is before
-// any data on the node.  This can happen if the logger starts before data is
-// published.  While the scenario below is a bit convoluted, we have seen logs
-// like this generated out in the wild.
+// Tests that we properly replay a log where the start time for a node is
+// before any data on the node.  This can happen if the logger starts before
+// data is published.  While the scenario below is a bit convoluted, we have
+// seen logs like this generated out in the wild.
 TEST(MultinodeRebootLoggerTest, StartTimeBeforeData) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(ArtifactPath(
           "aos/events/logging/multinode_pingpong_split3_config.json"));
@@ -3167,17 +3155,14 @@
       event_loop_factory.configuration(), pi3->node());
 
   const std::string kLogfile1_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
   const std::string kLogfile2_2 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.2/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.2/";
   const std::string kLogfile3_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile3/";
-  util::UnlinkRecursive(kLogfile1_1);
-  util::UnlinkRecursive(kLogfile2_1);
-  util::UnlinkRecursive(kLogfile2_2);
-  util::UnlinkRecursive(kLogfile3_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile3/";
+
   const UUID pi1_boot0 = UUID::Random();
   const UUID pi2_boot0 = UUID::Random();
   const UUID pi2_boot1 = UUID::Random();
@@ -3213,13 +3198,16 @@
   std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLoggerState(
-        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     LoggerState pi3_logger = MakeLoggerState(
-        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     {
       // And now start the logger.
       LoggerState pi2_logger = MakeLoggerState(
-          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+          FileStrategy::kKeepSeparate);
 
       event_loop_factory.RunFor(chrono::milliseconds(1000));
 
@@ -3254,7 +3242,8 @@
 
       // Start logging again on pi2 after it is up.
       LoggerState pi2_logger = MakeLoggerState(
-          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+          FileStrategy::kKeepSeparate);
       pi2_logger.StartLogger(kLogfile2_2);
 
       event_loop_factory.RunFor(chrono::milliseconds(10000));
@@ -3277,9 +3266,10 @@
     pi3_logger.AppendAllFilenames(&filenames);
   }
 
-  // Confirm that we can parse the result.  LogReader has enough internal CHECKs
-  // to confirm the right thing happened.
+  // Confirm that we can parse the result.  LogReader has enough internal
+  // CHECKs to confirm the right thing happened.
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   auto result = ConfirmReadable(filenames);
   EXPECT_THAT(result[0].first, ::testing::ElementsAre(realtime_clock::epoch() +
                                                       chrono::seconds(1)));
@@ -3304,16 +3294,20 @@
 }
 
 // Tests that local data before remote data after reboot is properly replayed.
-// We only trigger a reboot in the timestamp interpolation function when solving
-// the timestamp problem when we actually have a point in the function.  This
-// originally only happened when a point passes the noncausal filter.  At the
-// start of time for the second boot, if we aren't careful, we will have
-// messages which need to be published at times before the boot.  This happens
-// when a local message is in the log before a forwarded message, so there is no
-// point in the interpolation function.  This delays the reboot.  So, we need to
-// recreate that situation and make sure it doesn't come back.
+// We only trigger a reboot in the timestamp interpolation function when
+// solving the timestamp problem when we actually have a point in the
+// function.  This originally only happened when a point passes the noncausal
+// filter.  At the start of time for the second boot, if we aren't careful, we
+// will have messages which need to be published at times before the boot.
+// This happens when a local message is in the log before a forwarded message,
+// so there is no point in the interpolation function.  This delays the
+// reboot.  So, we need to recreate that situation and make sure it doesn't
+// come back.
 TEST(MultinodeRebootLoggerTest,
      LocalMessageBeforeRemoteBeforeStartAfterReboot) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(ArtifactPath(
           "aos/events/logging/multinode_pingpong_split3_config.json"));
@@ -3335,17 +3329,13 @@
       event_loop_factory.configuration(), pi3->node());
 
   const std::string kLogfile1_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
   const std::string kLogfile2_2 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.2/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.2/";
   const std::string kLogfile3_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile3/";
-  util::UnlinkRecursive(kLogfile1_1);
-  util::UnlinkRecursive(kLogfile2_1);
-  util::UnlinkRecursive(kLogfile2_2);
-  util::UnlinkRecursive(kLogfile3_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile3/";
   const UUID pi1_boot0 = UUID::Random();
   const UUID pi2_boot0 = UUID::Random();
   const UUID pi2_boot1 = UUID::Random();
@@ -3377,13 +3367,16 @@
   std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLoggerState(
-        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     LoggerState pi3_logger = MakeLoggerState(
-        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     {
       // And now start the logger.
       LoggerState pi2_logger = MakeLoggerState(
-          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+          FileStrategy::kKeepSeparate);
 
       pi1_logger.StartLogger(kLogfile1_1);
       pi3_logger.StartLogger(kLogfile3_1);
@@ -3411,7 +3404,8 @@
     {
       event_loop_factory.RunFor(chrono::milliseconds(1000));
 
-      // Make local stuff happen before we start logging and connect the remote.
+      // Make local stuff happen before we start logging and connect the
+      // remote.
       pi2->AlwaysStart<Pong>("pong");
       std::unique_ptr<aos::EventLoop> ping_event_loop =
           pi1->MakeEventLoop("ping");
@@ -3420,7 +3414,8 @@
 
       // Start logging again on pi2 after it is up.
       LoggerState pi2_logger = MakeLoggerState(
-          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+          FileStrategy::kKeepSeparate);
       pi2_logger.StartLogger(kLogfile2_2);
 
       // And allow remote messages now that we have some local ones.
@@ -3438,9 +3433,10 @@
     pi3_logger.AppendAllFilenames(&filenames);
   }
 
-  // Confirm that we can parse the result.  LogReader has enough internal CHECKs
-  // to confirm the right thing happened.
+  // Confirm that we can parse the result.  LogReader has enough internal
+  // CHECKs to confirm the right thing happened.
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   auto result = ConfirmReadable(filenames);
 
   EXPECT_THAT(result[0].first, ::testing::ElementsAre(realtime_clock::epoch()));
@@ -3489,6 +3485,9 @@
 // Tests that setting the start and stop flags across a reboot works as
 // expected.
 TEST(MultinodeRebootLoggerTest, RebootStartStopTimes) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(ArtifactPath(
           "aos/events/logging/multinode_pingpong_split3_config.json"));
@@ -3510,17 +3509,13 @@
       event_loop_factory.configuration(), pi3->node());
 
   const std::string kLogfile1_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
   const std::string kLogfile2_2 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.2/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.2/";
   const std::string kLogfile3_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile3/";
-  util::UnlinkRecursive(kLogfile1_1);
-  util::UnlinkRecursive(kLogfile2_1);
-  util::UnlinkRecursive(kLogfile2_2);
-  util::UnlinkRecursive(kLogfile3_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile3/";
   {
     CHECK_EQ(pi1_index, 0u);
     CHECK_EQ(pi2_index, 1u);
@@ -3542,13 +3537,16 @@
   std::vector<std::string> filenames;
   {
     LoggerState pi1_logger = MakeLoggerState(
-        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     LoggerState pi3_logger = MakeLoggerState(
-        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     {
       // And now start the logger.
       LoggerState pi2_logger = MakeLoggerState(
-          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+          FileStrategy::kKeepSeparate);
 
       pi1_logger.StartLogger(kLogfile1_1);
       pi3_logger.StartLogger(kLogfile3_1);
@@ -3572,7 +3570,8 @@
     {
       event_loop_factory.RunFor(chrono::milliseconds(1000));
 
-      // Make local stuff happen before we start logging and connect the remote.
+      // Make local stuff happen before we start logging and connect the
+      // remote.
       pi2->AlwaysStart<Pong>("pong");
       std::unique_ptr<aos::EventLoop> ping_event_loop =
           pi1->MakeEventLoop("ping");
@@ -3581,7 +3580,8 @@
 
       // Start logging again on pi2 after it is up.
       LoggerState pi2_logger = MakeLoggerState(
-          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+          pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+          FileStrategy::kKeepSeparate);
       pi2_logger.StartLogger(kLogfile2_2);
 
       event_loop_factory.RunFor(chrono::milliseconds(5000));
@@ -3594,6 +3594,7 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   auto result = ConfirmReadable(filenames);
 
   EXPECT_THAT(result[0].first, ::testing::ElementsAre(realtime_clock::epoch()));
@@ -3645,6 +3646,9 @@
 
 // Tests that we properly handle one direction being down.
 TEST(MissingDirectionTest, OneDirection) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(ArtifactPath(
           "aos/events/logging/multinode_pingpong_split4_config.json"));
@@ -3679,11 +3683,9 @@
   }
 
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
   const std::string kLogfile1_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile1.1/";
-  util::UnlinkRecursive(kLogfile2_1);
-  util::UnlinkRecursive(kLogfile1_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1.1/";
 
   pi2->Disconnect(pi1->node());
 
@@ -3692,7 +3694,8 @@
 
   {
     LoggerState pi2_logger = MakeLoggerState(
-        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
 
     event_loop_factory.RunFor(chrono::milliseconds(95));
 
@@ -3703,7 +3706,8 @@
     pi2->Connect(pi1->node());
 
     LoggerState pi1_logger = MakeLoggerState(
-        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     pi1_logger.StartLogger(kLogfile1_1);
 
     event_loop_factory.RunFor(chrono::milliseconds(5000));
@@ -3712,12 +3716,16 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   ConfirmReadable(filenames);
 }
 
 // Tests that we properly handle only one direction ever existing after a
 // reboot.
 TEST(MissingDirectionTest, OneDirectionAfterReboot) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(ArtifactPath(
           "aos/events/logging/multinode_pingpong_split4_config.json"));
@@ -3752,8 +3760,7 @@
   }
 
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
-  util::UnlinkRecursive(kLogfile2_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
 
   pi1->AlwaysStart<Ping>("ping");
 
@@ -3762,7 +3769,8 @@
   // second boot.
   {
     LoggerState pi2_logger = MakeLoggerState(
-        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
 
     event_loop_factory.RunFor(chrono::milliseconds(95));
 
@@ -3780,15 +3788,20 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   ConfirmReadable(filenames);
 }
 
-// Tests that we properly handle only one direction ever existing after a reboot
-// with only reliable data.
+// Tests that we properly handle only one direction ever existing after a
+// reboot with only reliable data.
 TEST(MissingDirectionTest, OneDirectionAfterRebootReliable) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
-      aos::configuration::ReadConfig(ArtifactPath(
-          "aos/events/logging/multinode_pingpong_split4_reliable_config.json"));
+      aos::configuration::ReadConfig(
+          ArtifactPath("aos/events/logging/"
+                       "multinode_pingpong_split4_reliable_config.json"));
   message_bridge::TestingTimeConverter time_converter(
       configuration::NodesCount(&config.message()));
   SimulatedEventLoopFactory event_loop_factory(&config.message());
@@ -3820,8 +3833,7 @@
   }
 
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
-  util::UnlinkRecursive(kLogfile2_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
 
   pi1->AlwaysStart<Ping>("ping");
 
@@ -3830,7 +3842,8 @@
   // second boot.
   {
     LoggerState pi2_logger = MakeLoggerState(
-        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
 
     event_loop_factory.RunFor(chrono::milliseconds(95));
 
@@ -3848,13 +3861,17 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   ConfirmReadable(filenames);
 }
 
-// Tests that we properly handle only one direction ever existing after a reboot
-// with mixed unreliable vs reliable, where reliable has an earlier timestamp
-// than unreliable.
+// Tests that we properly handle only one direction ever existing after a
+// reboot with mixed unreliable vs reliable, where reliable has an earlier
+// timestamp than unreliable.
 TEST(MissingDirectionTest, OneDirectionAfterRebootMixedCase1) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(ArtifactPath(
           "aos/events/logging/multinode_pingpong_split4_mixed1_config.json"));
@@ -3889,8 +3906,7 @@
   }
 
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
-  util::UnlinkRecursive(kLogfile2_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
 
   // The following sequence using the above reference config creates
   // a reliable message timestamp < unreliable message timestamp.
@@ -3909,7 +3925,8 @@
     event_loop_factory.RunFor(chrono::milliseconds(1000));
 
     LoggerState pi2_logger = MakeLoggerState(
-        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
 
     pi2_logger.StartLogger(kLogfile2_1);
 
@@ -3918,13 +3935,17 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   ConfirmReadable(filenames);
 }
 
-// Tests that we properly handle only one direction ever existing after a reboot
-// with mixed unreliable vs reliable, where unreliable has an earlier timestamp
-// than reliable.
+// Tests that we properly handle only one direction ever existing after a
+// reboot with mixed unreliable vs reliable, where unreliable has an earlier
+// timestamp than reliable.
 TEST(MissingDirectionTest, OneDirectionAfterRebootMixedCase2) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(ArtifactPath(
           "aos/events/logging/multinode_pingpong_split4_mixed2_config.json"));
@@ -3959,8 +3980,7 @@
   }
 
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
-  util::UnlinkRecursive(kLogfile2_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
 
   // The following sequence using the above reference config creates
   // an unreliable message timestamp < reliable message timestamp.
@@ -3979,7 +3999,8 @@
     event_loop_factory.RunFor(chrono::milliseconds(1000));
 
     LoggerState pi2_logger = MakeLoggerState(
-        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
 
     pi2_logger.StartLogger(kLogfile2_1);
 
@@ -3988,15 +4009,17 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   ConfirmReadable(filenames);
 }
 
 // Tests that we properly handle what used to be a time violation in one
 // direction.  This can occur when one direction goes down after sending some
-// data, but the other keeps working.  The down direction ends up resolving to a
-// straight line in the noncausal filter, where the direction which is still up
-// can cross that line.  Really, time progressed along just fine but we assumed
-// that the offset was a line when it could have deviated by up to 1ms/second.
+// data, but the other keeps working.  The down direction ends up resolving to
+// a straight line in the noncausal filter, where the direction which is still
+// up can cross that line.  Really, time progressed along just fine but we
+// assumed that the offset was a line when it could have deviated by up to
+// 1ms/second.
 TEST_P(MultinodeLoggerTest, OneDirectionTimeDrift) {
   std::vector<std::string> filenames;
 
@@ -4022,8 +4045,7 @@
            chrono::milliseconds(10000) + chrono::milliseconds(5)});
 
   const std::string kLogfile =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
-  util::UnlinkRecursive(kLogfile);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
 
   {
     LoggerState pi2_logger = MakeLogger(pi2_);
@@ -4040,12 +4062,13 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   ConfirmReadable(filenames);
 }
 
-// Tests that we can replay a logfile that has timestamps such that at least one
-// node's epoch is at a positive distributed_clock (and thus will have to be
-// booted after the other node(s)).
+// Tests that we can replay a logfile that has timestamps such that at least
+// one node's epoch is at a positive distributed_clock (and thus will have to
+// be booted after the other node(s)).
 TEST_P(MultinodeLoggerTest, StartOneNodeBeforeOther) {
   std::vector<std::string> filenames;
 
@@ -4064,8 +4087,7 @@
       {chrono::milliseconds(10000), chrono::milliseconds(10000)});
 
   const std::string kLogfile =
-      aos::testing::TestTmpDir() + "/multi_logfile2.1/";
-  util::UnlinkRecursive(kLogfile);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2.1/";
 
   pi2_->Disconnect(pi1_->node());
   pi1_->Disconnect(pi2_->node());
@@ -4085,6 +4107,7 @@
   }
 
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   ConfirmReadable(filenames);
 
   {
@@ -4110,12 +4133,16 @@
   }
 }
 
-// Tests that when we have a loop without all the logs at all points in time, we
-// can sort it properly.
+// Tests that when we have a loop without all the logs at all points in time,
+// we can sort it properly.
 TEST(MultinodeLoggerLoopTest, Loop) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
-      aos::configuration::ReadConfig(ArtifactPath(
-          "aos/events/logging/multinode_pingpong_triangle_split_config.json"));
+      aos::configuration::ReadConfig(
+          ArtifactPath("aos/events/logging/"
+                       "multinode_pingpong_triangle_split_config.json"));
   message_bridge::TestingTimeConverter time_converter(
       configuration::NodesCount(&config.message()));
   SimulatedEventLoopFactory event_loop_factory(&config.message());
@@ -4129,14 +4156,11 @@
       event_loop_factory.GetNodeEventLoopFactory("pi3");
 
   const std::string kLogfile1_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile1/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
   const std::string kLogfile2_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile2/";
+      aos::testing::TestTmpDir() + "/logs/multi_logfile2/";
   const std::string kLogfile3_1 =
-      aos::testing::TestTmpDir() + "/multi_logfile3/";
-  util::UnlinkRecursive(kLogfile1_1);
-  util::UnlinkRecursive(kLogfile2_1);
-  util::UnlinkRecursive(kLogfile3_1);
+      aos::testing::TestTmpDir() + "/logs/multi_logfile3/";
 
   {
     // Make pi1 boot before everything else.
@@ -4147,9 +4171,9 @@
          BootTimestamp::epoch() - chrono::milliseconds(300)});
   }
 
-  // We want to setup a situation such that 2 of the 3 legs of the loop are very
-  // confident about time being X, and the third leg is pulling the average off
-  // to one side.
+  // We want to setup a situation such that 2 of the 3 legs of the loop are
+  // very confident about time being X, and the third leg is pulling the
+  // average off to one side.
   //
   // It's easiest to visualize this in timestamp_plotter.
 
@@ -4168,24 +4192,28 @@
       CHECK_EQ(builder.Send(ping_builder.Finish()), RawSender::Error::kOk);
     }
 
-    // Wait a while so there's enough data to let the worst case be rather off.
+    // Wait a while so there's enough data to let the worst case be rather
+    // off.
     event_loop_factory.RunFor(chrono::seconds(1000));
 
-    // Now start a receiving node first.  This sets up 2 tight bounds between 2
-    // of the nodes.
+    // Now start a receiving node first.  This sets up 2 tight bounds between
+    // 2 of the nodes.
     LoggerState pi2_logger = MakeLoggerState(
-        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi2, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     pi2_logger.StartLogger(kLogfile2_1);
 
     event_loop_factory.RunFor(chrono::seconds(100));
 
     // And now start the third leg.
     LoggerState pi3_logger = MakeLoggerState(
-        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi3, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     pi3_logger.StartLogger(kLogfile3_1);
 
     LoggerState pi1_logger = MakeLoggerState(
-        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0]);
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
     pi1_logger.StartLogger(kLogfile1_1);
 
     event_loop_factory.RunFor(chrono::seconds(100));
@@ -4197,13 +4225,14 @@
 
   // Make sure we can read this.
   const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
   auto result = ConfirmReadable(filenames);
 }
 
 // Tests that RestartLogging works in the simple case.  Unfortunately, the
-// failure cases involve simulating time elapsing in callbacks, which is really
-// hard.  The best we can reasonably do is make sure 2 back to back logs are
-// parseable together.
+// failure cases involve simulating time elapsing in callbacks, which is
+// really hard.  The best we can reasonably do is make sure 2 back to back
+// logs are parseable together.
 TEST_P(MultinodeLoggerTest, RestartLogging) {
   time_converter_.AddMonotonic(
       {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
@@ -4216,18 +4245,19 @@
     StartLogger(&pi1_logger, logfile_base1_);
     aos::monotonic_clock::time_point last_rotation_time =
         pi1_logger.event_loop->monotonic_now();
-    pi1_logger.logger->set_on_logged_period([&] {
-      const auto now = pi1_logger.event_loop->monotonic_now();
-      if (now > last_rotation_time + std::chrono::seconds(5)) {
-        pi1_logger.AppendAllFilenames(&filenames);
-        std::unique_ptr<MultiNodeFilesLogNamer> namer =
-            pi1_logger.MakeLogNamer(logfile_base2_);
-        pi1_logger.log_namer = namer.get();
+    pi1_logger.logger->set_on_logged_period(
+        [&](aos::monotonic_clock::time_point) {
+          const auto now = pi1_logger.event_loop->monotonic_now();
+          if (now > last_rotation_time + std::chrono::seconds(5)) {
+            pi1_logger.AppendAllFilenames(&filenames);
+            std::unique_ptr<MultiNodeFilesLogNamer> namer =
+                pi1_logger.MakeLogNamer(logfile_base2_);
+            pi1_logger.log_namer = namer.get();
 
-        pi1_logger.logger->RestartLogging(std::move(namer));
-        last_rotation_time = now;
-      }
-    });
+            pi1_logger.logger->RestartLogging(std::move(namer));
+            last_rotation_time = now;
+          }
+        });
 
     event_loop_factory_.RunFor(chrono::milliseconds(7000));
 
@@ -4242,8 +4272,414 @@
 
   ConfirmReadable(filenames);
 
-  // TODO(austin): It would be good to confirm that any one time messages end up
-  // in both logs correctly.
+  // TODO(austin): It would be good to confirm that any one time messages end
+  // up in both logs correctly.
+}
+
+// Tests that we call OnEnd without --skip_missing_forwarding_entries.
+TEST_P(MultinodeLoggerTest, SkipMissingForwardingEntries) {
+  if (file_strategy() == FileStrategy::kCombine) {
+    GTEST_SKIP() << "We don't need to test the combined file writer this deep.";
+  }
+  time_converter_.AddMonotonic(
+      {BootTimestamp::epoch(), BootTimestamp::epoch() + chrono::seconds(1000)});
+
+  std::vector<std::string> filenames;
+  {
+    LoggerState pi1_logger = MakeLogger(pi1_);
+
+    event_loop_factory_.RunFor(chrono::milliseconds(95));
+
+    StartLogger(&pi1_logger);
+    aos::monotonic_clock::time_point last_rotation_time =
+        pi1_logger.event_loop->monotonic_now();
+    pi1_logger.logger->set_on_logged_period(
+        [&](aos::monotonic_clock::time_point) {
+          const auto now = pi1_logger.event_loop->monotonic_now();
+          if (now > last_rotation_time + std::chrono::seconds(5)) {
+            pi1_logger.logger->Rotate();
+            last_rotation_time = now;
+          }
+        });
+
+    event_loop_factory_.RunFor(chrono::milliseconds(15000));
+    pi1_logger.AppendAllFilenames(&filenames);
+  }
+
+  // If we remove the last remote data part, we'll trigger missing data for
+  // timestamps.
+  filenames.erase(std::remove_if(filenames.begin(), filenames.end(),
+                                 [](const std::string &s) {
+                                   return s.find("data/pi2_data.part3.bfbs") !=
+                                          std::string::npos;
+                                 }),
+                  filenames.end());
+
+  auto result = ConfirmReadable(filenames);
+}
+
+// Tests that we call OnEnd without --skip_missing_forwarding_entries.
+TEST(MultinodeLoggerConfigTest, SingleNode) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(
+          ArtifactPath("aos/events/logging/multinode_single_node_config.json"));
+  message_bridge::TestingTimeConverter time_converter(
+      configuration::NodesCount(&config.message()));
+  SimulatedEventLoopFactory event_loop_factory(&config.message());
+  event_loop_factory.SetTimeConverter(&time_converter);
+
+  time_converter.StartEqual();
+
+  const std::string kLogfile1_1 =
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
+
+  NodeEventLoopFactory *const pi1 =
+      event_loop_factory.GetNodeEventLoopFactory("pi1");
+
+  std::vector<std::string> filenames;
+
+  {
+    // Now start a receiving node first.  This sets up 2 tight bounds between
+    // 2 of the nodes.
+    LoggerState pi1_logger = MakeLoggerState(
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
+    pi1_logger.StartLogger(kLogfile1_1);
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+  }
+
+  // Make sure we can read this.
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  auto result = ConfirmReadable(filenames);
+
+  // TODO(austin): Probably want to stop caring about ServerStatistics,
+  // ClientStatistics, and Timestamp since they don't really make sense.
+}
+
+// Tests that when we have evidence of 2 boots, and then start logging, the
+// max_out_of_order_duration ends up reasonable on the boot with the start time.
+TEST(MultinodeLoggerLoopTest, PreviousBootData) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(ArtifactPath(
+          "aos/events/logging/multinode_pingpong_reboot_ooo_config.json"));
+  message_bridge::TestingTimeConverter time_converter(
+      configuration::NodesCount(&config.message()));
+  SimulatedEventLoopFactory event_loop_factory(&config.message());
+  event_loop_factory.SetTimeConverter(&time_converter);
+
+  const UUID pi1_boot0 = UUID::Random();
+  const UUID pi2_boot0 = UUID::Random();
+  const UUID pi2_boot1 = UUID::Random();
+
+  const std::string kLogfile1_1 =
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
+
+  {
+    constexpr size_t kPi1Index = 0;
+    constexpr size_t kPi2Index = 1;
+    time_converter.set_boot_uuid(kPi1Index, 0, pi1_boot0);
+    time_converter.set_boot_uuid(kPi2Index, 0, pi2_boot0);
+    time_converter.set_boot_uuid(kPi2Index, 1, pi2_boot1);
+
+    // Make pi1 boot before everything else.
+    time_converter.AddNextTimestamp(
+        distributed_clock::epoch(),
+        {BootTimestamp::epoch(),
+         BootTimestamp::epoch() - chrono::milliseconds(100)});
+
+    const chrono::nanoseconds reboot_time = chrono::seconds(1005);
+    time_converter.AddNextTimestamp(
+        distributed_clock::epoch() + reboot_time,
+        {BootTimestamp::epoch() + reboot_time,
+         BootTimestamp{.boot = 1, .time = monotonic_clock::epoch()}});
+  }
+
+  NodeEventLoopFactory *const pi1 =
+      event_loop_factory.GetNodeEventLoopFactory("pi1");
+  NodeEventLoopFactory *const pi2 =
+      event_loop_factory.GetNodeEventLoopFactory("pi2");
+
+  // What we want is for pi2 to send a message at t=1000 on the first channel
+  // (/atest1 pong), and t=1 on the second channel (/atest3 pong).  That'll make
+  // the max out of order duration be large.
+  //
+  // Then, we reboot, and only send messages on a third channel (/atest2 pong).
+  // The order is key, they need to sort in this order in the config.
+
+  std::vector<std::string> filenames;
+  {
+    {
+      std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+      aos::Sender<examples::Pong> pong_sender =
+          pi2_event_loop->MakeSender<examples::Pong>("/atest3");
+
+      pi2_event_loop->OnRun([&]() {
+        aos::Sender<examples::Pong>::Builder builder =
+            pong_sender.MakeBuilder();
+        examples::Pong::Builder pong_builder =
+            builder.MakeBuilder<examples::Pong>();
+        CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+      });
+
+      event_loop_factory.RunFor(chrono::seconds(1000));
+    }
+
+    {
+      std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+      aos::Sender<examples::Pong> pong_sender =
+          pi2_event_loop->MakeSender<examples::Pong>("/atest1");
+
+      aos::Sender<examples::Pong>::Builder builder = pong_sender.MakeBuilder();
+      examples::Pong::Builder pong_builder =
+          builder.MakeBuilder<examples::Pong>();
+      CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+    }
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    // Now start a receiving node first.  This sets up 2 tight bounds between
+    // 2 of the nodes.
+    LoggerState pi1_logger = MakeLoggerState(
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
+    pi1_logger.StartLogger(kLogfile1_1);
+
+    std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+    aos::Sender<examples::Pong> pong_sender =
+        pi2_event_loop->MakeSender<examples::Pong>("/atest2");
+
+    pi2_event_loop->AddPhasedLoop(
+        [&pong_sender](int) {
+          aos::Sender<examples::Pong>::Builder builder =
+              pong_sender.MakeBuilder();
+          examples::Pong::Builder pong_builder =
+              builder.MakeBuilder<examples::Pong>();
+          CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+        },
+        chrono::milliseconds(10));
+
+    event_loop_factory.RunFor(chrono::seconds(100));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+  }
+
+  // Make sure we can read this.
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllRebootPartsMatchOutOfOrderDuration(sorted_parts, "pi2"));
+  auto result = ConfirmReadable(filenames);
+}
+
+// Tests that when we start without a connection, and then start logging, the
+// max_out_of_order_duration ends up reasonable.
+TEST(MultinodeLoggerLoopTest, StartDisconnected) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(ArtifactPath(
+          "aos/events/logging/multinode_pingpong_reboot_ooo_config.json"));
+  message_bridge::TestingTimeConverter time_converter(
+      configuration::NodesCount(&config.message()));
+  SimulatedEventLoopFactory event_loop_factory(&config.message());
+  event_loop_factory.SetTimeConverter(&time_converter);
+
+  time_converter.StartEqual();
+
+  const std::string kLogfile1_1 =
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
+
+  NodeEventLoopFactory *const pi1 =
+      event_loop_factory.GetNodeEventLoopFactory("pi1");
+  NodeEventLoopFactory *const pi2 =
+      event_loop_factory.GetNodeEventLoopFactory("pi2");
+
+  // What we want is for pi2 to send a message at t=1000 on the first channel
+  // (/atest1 pong), and t=1 on the second channel (/atest3 pong).  That'll make
+  // the max out of order duration be large.
+  //
+  // Then, we disconnect, and only send messages on a third channel
+  // (/atest2 pong). The order is key, they need to sort in this order in the
+  // config so we observe them in the order which grows the
+  // max_out_of_order_duration.
+
+  std::vector<std::string> filenames;
+  {
+    {
+      std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+      aos::Sender<examples::Pong> pong_sender =
+          pi2_event_loop->MakeSender<examples::Pong>("/atest3");
+
+      pi2_event_loop->OnRun([&]() {
+        aos::Sender<examples::Pong>::Builder builder =
+            pong_sender.MakeBuilder();
+        examples::Pong::Builder pong_builder =
+            builder.MakeBuilder<examples::Pong>();
+        CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+      });
+
+      event_loop_factory.RunFor(chrono::seconds(1000));
+    }
+
+    {
+      std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+      aos::Sender<examples::Pong> pong_sender =
+          pi2_event_loop->MakeSender<examples::Pong>("/atest1");
+
+      aos::Sender<examples::Pong>::Builder builder = pong_sender.MakeBuilder();
+      examples::Pong::Builder pong_builder =
+          builder.MakeBuilder<examples::Pong>();
+      CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+    }
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    pi1->Disconnect(pi2->node());
+    pi2->Disconnect(pi1->node());
+
+    // Make data flow.
+    std::unique_ptr<EventLoop> pi2_event_loop = pi2->MakeEventLoop("pong");
+    aos::Sender<examples::Pong> pong_sender =
+        pi2_event_loop->MakeSender<examples::Pong>("/atest2");
+
+    pi2_event_loop->AddPhasedLoop(
+        [&pong_sender](int) {
+          aos::Sender<examples::Pong>::Builder builder =
+              pong_sender.MakeBuilder();
+          examples::Pong::Builder pong_builder =
+              builder.MakeBuilder<examples::Pong>();
+          CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+        },
+        chrono::milliseconds(10));
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    // Now start a receiving node first.  This sets up 2 tight bounds between
+    // 2 of the nodes.
+    LoggerState pi1_logger = MakeLoggerState(
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
+    pi1_logger.StartLogger(kLogfile1_1);
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    // Now, reconnect, and everything should recover.
+    pi1->Connect(pi2->node());
+    pi2->Connect(pi1->node());
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+  }
+
+  // Make sure we can read this.
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  auto result = ConfirmReadable(filenames);
+}
+
+// Class to spam Pong messages blindly.
+class PongSender {
+ public:
+  PongSender(EventLoop *loop, std::string_view channel_name)
+      : sender_(loop->MakeSender<examples::Pong>(channel_name)) {
+    loop->AddPhasedLoop(
+        [this](int) {
+          aos::Sender<examples::Pong>::Builder builder = sender_.MakeBuilder();
+          examples::Pong::Builder pong_builder =
+              builder.MakeBuilder<examples::Pong>();
+          CHECK_EQ(builder.Send(pong_builder.Finish()), RawSender::Error::kOk);
+        },
+        chrono::milliseconds(10));
+  }
+
+ private:
+  aos::Sender<examples::Pong> sender_;
+};
+
+// Tests that we log correctly as nodes connect slowly.
+TEST(MultinodeLoggerLoopTest, StaggeredConnect) {
+  util::UnlinkRecursive(aos::testing::TestTmpDir() + "/logs");
+  std::filesystem::create_directory(aos::testing::TestTmpDir() + "/logs");
+
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(ArtifactPath(
+          "aos/events/logging/multinode_pingpong_pi3_pingpong_config.json"));
+  message_bridge::TestingTimeConverter time_converter(
+      configuration::NodesCount(&config.message()));
+  SimulatedEventLoopFactory event_loop_factory(&config.message());
+  event_loop_factory.SetTimeConverter(&time_converter);
+
+  time_converter.StartEqual();
+
+  const std::string kLogfile1_1 =
+      aos::testing::TestTmpDir() + "/logs/multi_logfile1/";
+
+  NodeEventLoopFactory *const pi1 =
+      event_loop_factory.GetNodeEventLoopFactory("pi1");
+  NodeEventLoopFactory *const pi2 =
+      event_loop_factory.GetNodeEventLoopFactory("pi2");
+  NodeEventLoopFactory *const pi3 =
+      event_loop_factory.GetNodeEventLoopFactory("pi3");
+
+  // What we want is for pi2 to send a message at t=1000 on the first channel
+  // (/atest1 pong), and t=1 on the second channel (/atest3 pong).  That'll make
+  // the max out of order duration be large.
+  //
+  // Then, we disconnect, and only send messages on a third channel
+  // (/atest2 pong). The order is key, they need to sort in this order in the
+  // config so we observe them in the order which grows the
+  // max_out_of_order_duration.
+
+  pi1->Disconnect(pi2->node());
+  pi2->Disconnect(pi1->node());
+
+  pi1->Disconnect(pi3->node());
+  pi3->Disconnect(pi1->node());
+
+  std::vector<std::string> filenames;
+  pi2->AlwaysStart<PongSender>("pongsender", "/test2");
+  pi3->AlwaysStart<PongSender>("pongsender", "/test3");
+
+  event_loop_factory.RunFor(chrono::seconds(10));
+
+  {
+    // Now start a receiving node first.  This sets up 2 tight bounds between
+    // 2 of the nodes.
+    LoggerState pi1_logger = MakeLoggerState(
+        pi1, &event_loop_factory, SupportedCompressionAlgorithms()[0],
+        FileStrategy::kKeepSeparate);
+    pi1_logger.StartLogger(kLogfile1_1);
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    // Now, reconnect, and everything should recover.
+    pi1->Connect(pi2->node());
+    pi2->Connect(pi1->node());
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    pi1->Connect(pi3->node());
+    pi3->Connect(pi1->node());
+
+    event_loop_factory.RunFor(chrono::seconds(10));
+
+    pi1_logger.AppendAllFilenames(&filenames);
+  }
+
+  // Make sure we can read this.
+  const std::vector<LogFile> sorted_parts = SortParts(filenames);
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
+  auto result = ConfirmReadable(filenames);
 }
 
 }  // namespace testing

diff --git a/aos/events/logging/multinode_logger_test_lib.cc b/aos/events/logging/multinode_logger_test_lib.cc
index fdee4d8..64dd1cd 100644
--- a/aos/events/logging/multinode_logger_test_lib.cc
+++ b/aos/events/logging/multinode_logger_test_lib.cc

@@ -3,11 +3,14 @@
 #include "aos/events/event_loop.h"
 #include "aos/events/logging/log_reader.h"
 #include "aos/events/logging/logfile_utils.h"
+#include "aos/events/logging/logfile_validator.h"
 #include "aos/events/ping_lib.h"
 #include "aos/events/pong_lib.h"
 #include "aos/events/simulated_event_loop.h"
 #include "aos/testing/tmpdir.h"
 
+DECLARE_bool(force_timestamp_loading);
+
 namespace aos {
 namespace logger {
 namespace testing {
@@ -17,6 +20,7 @@
 LoggerState MakeLoggerState(NodeEventLoopFactory *node,
                             SimulatedEventLoopFactory *factory,
                             CompressionParams params,
+                            FileStrategy file_strategy,
                             const Configuration *configuration) {
   if (configuration == nullptr) {
     configuration = factory->configuration();
@@ -26,14 +30,18 @@
           configuration,
           configuration::GetNode(configuration, node->node()),
           nullptr,
-          params};
+          params,
+          file_strategy};
 }
 
 std::unique_ptr<MultiNodeFilesLogNamer> LoggerState::MakeLogNamer(
     std::string logfile_base) {
   std::unique_ptr<MultiNodeFilesLogNamer> namer =
-      std::make_unique<MultiNodeFilesLogNamer>(logfile_base, configuration,
-                                               event_loop.get(), node);
+      file_strategy == FileStrategy::kCombine
+          ? std::make_unique<MinimalFileMultiNodeLogNamer>(
+                logfile_base, configuration, event_loop.get(), node)
+          : std::make_unique<MultiNodeFilesLogNamer>(
+                logfile_base, configuration, event_loop.get(), node);
   namer->set_extension(params.extension);
   namer->set_encoder_factory(params.encoder_factory);
   return namer;
@@ -94,32 +102,24 @@
       pi2_index_(configuration::GetNodeIndex(
           event_loop_factory_.configuration(), pi2_->node())),
       tmp_dir_(aos::testing::TestTmpDir()),
-      logfile_base1_(tmp_dir_ + "/multi_logfile1"),
-      logfile_base2_(tmp_dir_ + "/multi_logfile2"),
+      logfile_base1_(tmp_dir_ + "/logs/multi_logfile1"),
+      logfile_base2_(tmp_dir_ + "/logs/multi_logfile2"),
       pi1_reboot_logfiles_(MakePi1RebootLogfiles()),
       logfiles_(MakeLogFiles(logfile_base1_, logfile_base2_)),
-      pi1_single_direction_logfiles_(MakePi1SingleDirectionLogfiles()),
       structured_logfiles_(StructureLogFiles()) {
+  FLAGS_force_timestamp_loading =
+      std::get<0>(GetParam()).timestamp_buffering ==
+      ForceTimestampBuffering::kForceBufferTimestamps;
+
+  util::UnlinkRecursive(tmp_dir_ + "/logs");
+  std::filesystem::create_directory(tmp_dir_ + "/logs");
+
   LOG(INFO) << "Config " << std::get<0>(GetParam()).config;
   event_loop_factory_.SetTimeConverter(&time_converter_);
 
-  // Go through and remove the logfiles if they already exist.
-  for (const auto &file : logfiles_) {
-    unlink(file.c_str());
-    unlink((file + ".xz").c_str());
-  }
-
-  for (const auto &file : MakeLogFiles(tmp_dir_ + "/relogged1",
-                                       tmp_dir_ + "/relogged2", 3, 3, true)) {
-    unlink(file.c_str());
-  }
-
-  for (const auto &file : pi1_reboot_logfiles_) {
-    unlink(file.c_str());
-  }
-
   LOG(INFO) << "Logging data to " << logfiles_[0] << ", " << logfiles_[1]
-            << " and " << logfiles_[2];
+            << " and " << logfiles_[2] << " shared? " << shared()
+            << " combine? " << (file_strategy() == FileStrategy::kCombine);
 
   pi1_->OnStartup([this]() {
     pi1_->AlwaysStart<Ping>("ping");
@@ -135,90 +135,72 @@
   return std::get<0>(GetParam()).shared;
 }
 
+FileStrategy MultinodeLoggerTest::file_strategy() const {
+  return std::get<0>(GetParam()).file_strategy;
+}
+
 std::vector<std::string> MultinodeLoggerTest::MakeLogFiles(
     std::string logfile_base1, std::string logfile_base2, size_t pi1_data_count,
-    size_t pi2_data_count, bool relogged_config) {
+    size_t pi2_data_count, size_t pi1_timestamps_count,
+    size_t pi2_timestamps_count, bool relogged_config) {
   std::string_view sha256 = relogged_config
                                 ? std::get<0>(GetParam()).relogged_sha256
                                 : std::get<0>(GetParam()).sha256;
   std::vector<std::string> result;
   result.emplace_back(absl::StrCat(logfile_base1, "_", sha256, Extension()));
   result.emplace_back(absl::StrCat(logfile_base2, "_", sha256, Extension()));
-  for (size_t i = 0; i < pi1_data_count; ++i) {
-    result.emplace_back(
-        absl::StrCat(logfile_base1, "_pi1_data.part", i, Extension()));
-  }
-  result.emplace_back(logfile_base1 + "_pi2_data/test/aos.examples.Pong.part0" +
-                      Extension());
-  result.emplace_back(logfile_base1 + "_pi2_data/test/aos.examples.Pong.part1" +
-                      Extension());
-  for (size_t i = 0; i < pi2_data_count; ++i) {
-    result.emplace_back(
-        absl::StrCat(logfile_base2, "_pi2_data.part", i, Extension()));
-  }
-  result.emplace_back(logfile_base2 +
-                      "_pi1_data/pi1/aos/aos.message_bridge.Timestamp.part0" +
-                      Extension());
-  result.emplace_back(logfile_base2 +
-                      "_pi1_data/pi1/aos/aos.message_bridge.Timestamp.part1" +
-                      Extension());
-  result.emplace_back(logfile_base1 +
-                      "_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part0" +
-                      Extension());
-  result.emplace_back(logfile_base1 +
-                      "_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part1" +
-                      Extension());
-  if (shared()) {
-    result.emplace_back(logfile_base1 +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/"
-                        "aos.message_bridge.RemoteMessage.part0" +
-                        Extension());
-    result.emplace_back(logfile_base1 +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/"
-                        "aos.message_bridge.RemoteMessage.part1" +
-                        Extension());
-    result.emplace_back(logfile_base1 +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/"
-                        "aos.message_bridge.RemoteMessage.part2" +
-                        Extension());
-    result.emplace_back(logfile_base2 +
-                        "_timestamps/pi2/aos/remote_timestamps/pi1/"
-                        "aos.message_bridge.RemoteMessage.part0" +
-                        Extension());
-    result.emplace_back(logfile_base2 +
-                        "_timestamps/pi2/aos/remote_timestamps/pi1/"
-                        "aos.message_bridge.RemoteMessage.part1" +
-                        Extension());
+
+  if (file_strategy() == FileStrategy::kCombine) {
+    for (size_t i = 0; i < pi1_data_count + pi1_timestamps_count; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base1, "_pi1_pi1_all.part", i, Extension()));
+    }
+    for (size_t i = 0; i < 3; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base1, "_pi1_pi2_all.part", i, Extension()));
+    }
+
+    for (size_t i = 0; i < pi2_data_count + pi2_timestamps_count; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base2, "_pi2_pi2_all.part", i, Extension()));
+    }
+
+    for (size_t i = 0; i < 3; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base2, "_pi2_pi1_all.part", i, Extension()));
+    }
   } else {
-    result.emplace_back(logfile_base1 +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part0" +
+    for (size_t i = 0; i < pi1_data_count; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base1, "_pi1_data.part", i, Extension()));
+    }
+    for (size_t i = 0; i < pi1_timestamps_count; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base1, "_pi1_timestamps.part", i, Extension()));
+    }
+    for (size_t i = 0; i < pi2_data_count; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base2, "_pi2_data.part", i, Extension()));
+    }
+    for (size_t i = 0; i < pi2_timestamps_count; ++i) {
+      result.emplace_back(
+          absl::StrCat(logfile_base2, "_pi2_timestamps.part", i, Extension()));
+    }
+    result.emplace_back(logfile_base2 + "_data/pi1_data.part0" + Extension());
+    result.emplace_back(logfile_base2 + "_data/pi1_data.part1" + Extension());
+    result.emplace_back(logfile_base1 + "_data/pi2_data.part0" + Extension());
+    result.emplace_back(logfile_base1 + "_data/pi2_data.part1" + Extension());
+    // shared and not shared config types will have the same output since the
+    // data writers are consolidated to per node instead of per channel.
+    result.emplace_back(logfile_base1 + "_timestamps/remote_pi2.part0" +
                         Extension());
-    result.emplace_back(logfile_base1 +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part1" +
+    result.emplace_back(logfile_base1 + "_timestamps/remote_pi2.part1" +
                         Extension());
-    result.emplace_back(logfile_base2 +
-                        "_timestamps/pi2/aos/remote_timestamps/pi1/pi2/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part0" +
+    result.emplace_back(logfile_base1 + "_timestamps/remote_pi2.part2" +
                         Extension());
-    result.emplace_back(logfile_base2 +
-                        "_timestamps/pi2/aos/remote_timestamps/pi1/pi2/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part1" +
+    result.emplace_back(logfile_base2 + "_timestamps/remote_pi1.part0" +
                         Extension());
-    result.emplace_back(logfile_base1 +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/test/"
-                        "aos-examples-Ping/"
-                        "aos.message_bridge.RemoteMessage.part0" +
-                        Extension());
-    result.emplace_back(logfile_base1 +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/test/"
-                        "aos-examples-Ping/"
-                        "aos.message_bridge.RemoteMessage.part1" +
+    result.emplace_back(logfile_base2 + "_timestamps/remote_pi1.part1" +
                         Extension());
   }
 
@@ -228,125 +210,56 @@
 std::vector<std::string> MultinodeLoggerTest::MakePi1RebootLogfiles() {
   std::vector<std::string> result;
   result.emplace_back(logfile_base1_ + "_pi1_data.part0" + Extension());
-  result.emplace_back(logfile_base1_ + "_pi1_data.part1" + Extension());
-  result.emplace_back(logfile_base1_ + "_pi1_data.part2" + Extension());
-  result.emplace_back(logfile_base1_ + "_pi1_data.part3" + Extension());
-  result.emplace_back(logfile_base1_ + "_pi1_data.part4" + Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/test/aos.examples.Pong.part0" + Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/test/aos.examples.Pong.part1" + Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/test/aos.examples.Pong.part2" + Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/test/aos.examples.Pong.part3" + Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part0" +
-                      Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part1" +
-                      Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part2" +
-                      Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part3" +
-                      Extension());
+  result.emplace_back(logfile_base1_ + "_pi1_timestamps.part0" + Extension());
+  result.emplace_back(logfile_base1_ + "_pi1_timestamps.part1" + Extension());
+  result.emplace_back(logfile_base1_ + "_pi1_timestamps.part2" + Extension());
+  result.emplace_back(logfile_base1_ + "_pi1_timestamps.part3" + Extension());
+
+  result.emplace_back(logfile_base1_ + "_data/pi2_data.part0" + Extension());
+  result.emplace_back(logfile_base1_ + "_data/pi2_data.part1" + Extension());
+  result.emplace_back(logfile_base1_ + "_data/pi2_data.part2" + Extension());
+  result.emplace_back(logfile_base1_ + "_data/pi2_data.part3" + Extension());
   result.emplace_back(absl::StrCat(
       logfile_base1_, "_", std::get<0>(GetParam()).sha256, Extension()));
-  if (shared()) {
-    for (size_t i = 0; i < 6; ++i) {
-      result.emplace_back(
-          absl::StrCat(logfile_base1_,
-                       "_timestamps/pi1/aos/remote_timestamps/pi2/"
-                       "aos.message_bridge.RemoteMessage.part",
-                       i, Extension()));
-    }
-  } else {
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part0" +
-                        Extension());
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part1" +
-                        Extension());
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part2" +
-                        Extension());
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/pi1/aos/"
-                        "aos-message_bridge-Timestamp/"
-                        "aos.message_bridge.RemoteMessage.part3" +
-                        Extension());
-
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/test/"
-                        "aos-examples-Ping/"
-                        "aos.message_bridge.RemoteMessage.part0" +
-                        Extension());
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/test/"
-                        "aos-examples-Ping/"
-                        "aos.message_bridge.RemoteMessage.part1" +
-                        Extension());
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/test/"
-                        "aos-examples-Ping/"
-                        "aos.message_bridge.RemoteMessage.part2" +
-                        Extension());
-    result.emplace_back(logfile_base1_ +
-                        "_timestamps/pi1/aos/remote_timestamps/pi2/test/"
-                        "aos-examples-Ping/"
-                        "aos.message_bridge.RemoteMessage.part3" +
-                        Extension());
+  for (size_t i = 0; i < 6; ++i) {
+    result.emplace_back(absl::StrCat(
+        logfile_base1_, "_timestamps/remote_pi2.part", i, Extension()));
   }
   return result;
 }
 
-std::vector<std::string> MultinodeLoggerTest::MakePi1SingleDirectionLogfiles() {
-  std::vector<std::string> result;
-  result.emplace_back(logfile_base1_ + "_pi1_data.part0" + Extension());
-  result.emplace_back(logfile_base1_ + "_pi1_data.part1" + Extension());
-  result.emplace_back(logfile_base1_ +
-                      "_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part0" +
-                      Extension());
-  result.emplace_back(absl::StrCat(
-      logfile_base1_, "_", std::get<0>(GetParam()).sha256, Extension()));
-  return result;
-}
-
 std::vector<std::string> MultinodeLoggerTest::MakePi1DeadNodeLogfiles() {
   std::vector<std::string> result;
-  result.emplace_back(logfile_base1_ + "_pi1_data.part0" + Extension());
-  result.emplace_back(absl::StrCat(
-      logfile_base1_, "_", std::get<0>(GetParam()).sha256, Extension()));
+  if (file_strategy() == FileStrategy::kCombine) {
+    result.emplace_back(logfile_base1_ + "_pi1_pi1_all.part0" + Extension());
+    result.emplace_back(absl::StrCat(
+        logfile_base1_, "_", std::get<0>(GetParam()).sha256, Extension()));
+  } else {
+    result.emplace_back(logfile_base1_ + "_pi1_data.part0" + Extension());
+    result.emplace_back(absl::StrCat(
+        logfile_base1_, "_", std::get<0>(GetParam()).sha256, Extension()));
+  }
   return result;
 }
 
 std::vector<std::vector<std::string>> MultinodeLoggerTest::StructureLogFiles() {
-  std::vector<std::vector<std::string>> result{
-      std::vector<std::string>{logfiles_[2], logfiles_[3], logfiles_[4]},
-      std::vector<std::string>{logfiles_[5], logfiles_[6]},
-      std::vector<std::string>{logfiles_[7], logfiles_[8], logfiles_[9]},
-      std::vector<std::string>{logfiles_[10], logfiles_[11]},
-      std::vector<std::string>{logfiles_[12], logfiles_[13]}};
-
-  if (shared()) {
-    result.emplace_back(
-        std::vector<std::string>{logfiles_[14], logfiles_[15], logfiles_[16]});
-    result.emplace_back(std::vector<std::string>{logfiles_[17], logfiles_[18]});
+  if (file_strategy() == FileStrategy::kCombine) {
+    return std::vector<std::vector<std::string>>{
+        std::vector<std::string>{logfiles_[2], logfiles_[3], logfiles_[4]},
+        std::vector<std::string>{logfiles_[5], logfiles_[6], logfiles_[7]},
+        std::vector<std::string>{logfiles_[8], logfiles_[9], logfiles_[10]},
+        std::vector<std::string>{logfiles_[11], logfiles_[12], logfiles_[13]}};
   } else {
-    result.emplace_back(std::vector<std::string>{logfiles_[14], logfiles_[15]});
-    result.emplace_back(std::vector<std::string>{logfiles_[16], logfiles_[17]});
-    result.emplace_back(std::vector<std::string>{logfiles_[18], logfiles_[19]});
+    return std::vector<std::vector<std::string>>{
+        std::vector<std::string>{logfiles_[2]},
+        std::vector<std::string>{logfiles_[3], logfiles_[4]},
+        std::vector<std::string>{logfiles_[5]},
+        std::vector<std::string>{logfiles_[6], logfiles_[7]},
+        std::vector<std::string>{logfiles_[8], logfiles_[9]},
+        std::vector<std::string>{logfiles_[10], logfiles_[11]},
+        std::vector<std::string>{logfiles_[12], logfiles_[13], logfiles_[14]},
+        std::vector<std::string>{logfiles_[15], logfiles_[16]}};
   }
-
-  return result;
 }
 
 std::string MultinodeLoggerTest::Extension() {
@@ -359,7 +272,8 @@
   if (factory == nullptr) {
     factory = &event_loop_factory_;
   }
-  return MakeLoggerState(node, factory, std::get<1>(GetParam()), configuration);
+  return MakeLoggerState(node, factory, std::get<1>(GetParam()),
+                         file_strategy(), configuration);
 }
 
 void MultinodeLoggerTest::StartLogger(LoggerState *logger,
@@ -422,7 +336,8 @@
   // depends on if we have the remote timestamps split across 2 files, or just
   // across 1, depending on if we are using a split or combined timestamp
   // channel config.
-  EXPECT_EQ(missing_rt_count, shared() ? 5u : 6u);
+  EXPECT_EQ(missing_rt_count,
+            file_strategy() == FileStrategy::kCombine ? 2u : 4u);
 
   EXPECT_EQ(log_event_uuids.size(), 2u);
   EXPECT_EQ(parts_uuids.size(), ToLogReaderVector(sorted_parts).size());
@@ -446,6 +361,8 @@
 
   EXPECT_THAT(sorted_parts[0].corrupted, ::testing::Eq(corrupted_parts));
   EXPECT_THAT(sorted_parts[1].corrupted, ::testing::Eq(corrupted_parts));
+
+  EXPECT_TRUE(AllPartsMatchOutOfOrderDuration(sorted_parts));
 }
 
 void MultinodeLoggerTest::AddExtension(std::string_view extension) {
@@ -526,6 +443,7 @@
 
     reader.Deregister();
   }
+  CHECK(LogIsReadableIfMultiNode(LogFilesContainer{SortParts(files)}));
   {
     std::vector<std::pair<std::vector<realtime_clock::time_point>,
                           std::vector<realtime_clock::time_point>>>
@@ -578,6 +496,9 @@
     reader.Deregister();
 
     for (auto x : result) {
+      EXPECT_EQ(x.first.size(), x.second.size())
+          << ": Got a different number of start and end times, that is very "
+             "bad.";
       for (auto y : x.first) {
         VLOG(1) << "Start " << y;
       }
@@ -589,6 +510,20 @@
   }
 }
 
+std::vector<std::pair<std::vector<realtime_clock::time_point>,
+                      std::vector<realtime_clock::time_point>>>
+MultinodeLoggerTest::ConfirmReadable(const std::vector<std::string> &files,
+                                     realtime_clock::time_point start_time,
+                                     realtime_clock::time_point end_time) {
+  LogFilesContainer c(SortParts(files));
+  if (file_strategy() == FileStrategy::kCombine) {
+    EXPECT_FALSE(c.TimestampsStoredSeparately());
+  } else {
+    EXPECT_TRUE(c.TimestampsStoredSeparately());
+  }
+  return testing::ConfirmReadable(files, start_time, end_time);
+}
+
 // Counts the number of messages on a channel.  Returns (channel name, channel
 // type, count) for every message matching matcher()
 std::vector<std::tuple<std::string, std::string, int>> CountChannelsMatching(
@@ -654,6 +589,45 @@
       });
 }
 
+bool AllPartsMatchOutOfOrderDuration(
+    const std::vector<LogFile> &files,
+    std::chrono::nanoseconds max_out_of_order_duration) {
+  bool result = true;
+  for (const LogFile &file : files) {
+    for (const LogParts &parts : file.parts) {
+      if (parts.max_out_of_order_duration != max_out_of_order_duration) {
+        LOG(ERROR) << "Found an out of order duration of "
+                   << parts.max_out_of_order_duration.count()
+                   << "ns instead of " << max_out_of_order_duration.count()
+                   << "ns for " << parts;
+        result = false;
+      }
+    }
+  }
+  return result;
+}
+
+bool AllRebootPartsMatchOutOfOrderDuration(
+    const std::vector<LogFile> &files, const std::string node,
+    std::chrono::nanoseconds max_out_of_order_duration) {
+  bool result = true;
+  for (const LogFile &file : files) {
+    for (const LogParts &parts : file.parts) {
+      if (parts.node == node && parts.boot_count == 0) {
+        continue;
+      }
+      if (parts.max_out_of_order_duration != max_out_of_order_duration) {
+        LOG(ERROR) << "Found an out of order duration of "
+                   << parts.max_out_of_order_duration.count()
+                   << "ns instead of " << max_out_of_order_duration.count()
+                   << "ns for " << parts;
+        result = false;
+      }
+    }
+  }
+  return result;
+}
+
 }  // namespace testing
 }  // namespace logger
 }  // namespace aos

diff --git a/aos/events/logging/multinode_logger_test_lib.h b/aos/events/logging/multinode_logger_test_lib.h
index e207179..d6d3f60 100644
--- a/aos/events/logging/multinode_logger_test_lib.h
+++ b/aos/events/logging/multinode_logger_test_lib.h

@@ -27,6 +27,18 @@
       encoder_factory;
 };
 
+enum class FileStrategy {
+  // Use MinimalFileMultiNodeLogNamer
+  kCombine,
+  // Use MultiNodeFilesLogNamer
+  kKeepSeparate,
+};
+
+enum class ForceTimestampBuffering {
+  kForceBufferTimestamps,
+  kAutoBuffer,
+};
+
 // Parameters to run all the tests with.
 struct ConfigParams {
   // The config file to use.
@@ -39,6 +51,11 @@
   std::string_view sha256;
   // sha256 of the relogged config
   std::string_view relogged_sha256;
+  // If kCombine, use MinimalFileMultiNodeLogNamer.
+  FileStrategy file_strategy;
+  // If kForceBufferTimestamps, set --force_timestamp_loading to force buffering
+  // timestamps at the start.
+  ForceTimestampBuffering timestamp_buffering;
 };
 
 struct LoggerState {
@@ -53,6 +70,7 @@
   const Node *node;
   MultiNodeFilesLogNamer *log_namer;
   CompressionParams params;
+  FileStrategy file_strategy;
 
   void AppendAllFilenames(std::vector<std::string> *filenames);
 
@@ -60,18 +78,19 @@
 };
 
 constexpr std::string_view kCombinedConfigSha1() {
-  return "d018002a9b780d45a69172a1e5dd1d6df49a7c6c63b9bae9125cdc0458ddc6ca";
+  return "8f8a0fd505b5cd8ffc553b2af2796232450a2db88c6db974c5e3406b38eecb93";
 }
 constexpr std::string_view kSplitConfigSha1() {
-  return "562f80087c0e95d9304127c4cb46962659b4bfc11def84253c67702b4213e6cf";
+  return "0a2443f8fe49815de16723c48881f3a78c4e6399273d15da2d77d35588c8cae8";
 }
 constexpr std::string_view kReloggedSplitConfigSha1() {
-  return "cb560559ee3111d7c67314e3e1a5fd7fc88e8b4cfd9d15ea71c8d1cae1c0480b";
+  return "5e4f4784b890c1fdca7aeeadad8bd0354325f3ed8f11dbc9a81112df87407931";
 }
 
 LoggerState MakeLoggerState(NodeEventLoopFactory *node,
                             SimulatedEventLoopFactory *factory,
                             CompressionParams params,
+                            FileStrategy file_strategy,
                             const Configuration *configuration = nullptr);
 std::vector<std::vector<std::string>> ToLogReaderVector(
     const std::vector<LogFile> &log_files);
@@ -99,23 +118,37 @@
     std::shared_ptr<const aos::Configuration> config,
     std::string_view filename);
 
+// Returns true if all of the max_out_of_order_duration's match the provided
+// max_out_of_order_duration.
+bool AllPartsMatchOutOfOrderDuration(
+    const std::vector<LogFile> &files,
+    std::chrono::nanoseconds max_out_of_order_duration =
+        std::chrono::milliseconds(300));
+
+// Skips checking the part file with boot_count 0 for 'node'.
+bool AllRebootPartsMatchOutOfOrderDuration(
+    const std::vector<LogFile> &files, const std::string node,
+    std::chrono::nanoseconds max_out_of_order_duration =
+        std::chrono::milliseconds(300));
+
 class MultinodeLoggerTest : public ::testing::TestWithParam<
                                 std::tuple<ConfigParams, CompressionParams>> {
  public:
   MultinodeLoggerTest();
 
   bool shared() const;
+  FileStrategy file_strategy() const;
 
   std::vector<std::string> MakeLogFiles(std::string logfile_base1,
                                         std::string logfile_base2,
-                                        size_t pi1_data_count = 3,
-                                        size_t pi2_data_count = 3,
+                                        size_t pi1_data_count = 1,
+                                        size_t pi2_data_count = 1,
+                                        size_t pi1_timestamp_count = 2,
+                                        size_t pi2_timestamp_count = 2,
                                         bool relogged_config = false);
 
   std::vector<std::string> MakePi1RebootLogfiles();
 
-  std::vector<std::string> MakePi1SingleDirectionLogfiles();
-
   std::vector<std::string> MakePi1DeadNodeLogfiles();
 
   std::vector<std::vector<std::string>> StructureLogFiles();
@@ -131,8 +164,17 @@
   void VerifyParts(const std::vector<LogFile> &sorted_parts,
                    const std::vector<std::string> &corrupted_parts = {});
 
+  std::vector<std::pair<std::vector<realtime_clock::time_point>,
+                        std::vector<realtime_clock::time_point>>>
+  ConfirmReadable(
+      const std::vector<std::string> &files,
+      realtime_clock::time_point start_time = realtime_clock::min_time,
+      realtime_clock::time_point end_time = realtime_clock::max_time);
+
   void AddExtension(std::string_view extension);
 
+  gflags::FlagSaver flag_saver_;
+
   // Config and factory.
   aos::FlatbufferDetachedBuffer<aos::Configuration> config_;
   message_bridge::TestingTimeConverter time_converter_;
@@ -148,7 +190,6 @@
   std::string logfile_base2_;
   std::vector<std::string> pi1_reboot_logfiles_;
   std::vector<std::string> logfiles_;
-  std::vector<std::string> pi1_single_direction_logfiles_;
 
   std::vector<std::vector<std::string>> structured_logfiles_;
 };

diff --git a/aos/events/logging/multinode_pingpong_pi3_pingpong.json b/aos/events/logging/multinode_pingpong_pi3_pingpong.json
new file mode 100644
index 0000000..336b328
--- /dev/null
+++ b/aos/events/logging/multinode_pingpong_pi3_pingpong.json

@@ -0,0 +1,312 @@
+{
+  "channels": [
+    {
+      "name": "/pi1/aos",
+      "type": "aos.logging.LogMessageFbs",
+      "source_node": "pi1",
+      "frequency": 200,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.logging.LogMessageFbs",
+      "source_node": "pi2",
+      "frequency": 200,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi3/aos",
+      "type": "aos.logging.LogMessageFbs",
+      "source_node": "pi3",
+      "frequency": 200,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    /* Logged on pi1 locally */
+    {
+      "name": "/pi1/aos",
+      "type": "aos.timing.Report",
+      "source_node": "pi1",
+      "frequency": 50,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.timing.Report",
+      "source_node": "pi2",
+      "frequency": 50,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi3/aos",
+      "type": "aos.timing.Report",
+      "source_node": "pi3",
+      "frequency": 50,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.message_bridge.ServerStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.message_bridge.ServerStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi2"
+    },
+    {
+      "name": "/pi3/aos",
+      "type": "aos.message_bridge.ServerStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi3"
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.logging.DynamicLogCommand",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.logging.DynamicLogCommand",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi2"
+    },
+    {
+      "name": "/pi3/aos",
+      "type": "aos.logging.DynamicLogCommand",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi3"
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.message_bridge.ClientStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.message_bridge.ClientStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi2"
+    },
+    {
+      "name": "/pi3/aos",
+      "type": "aos.message_bridge.ClientStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi3"
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.message_bridge.Timestamp",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi2"],
+      "source_node": "pi1",
+      "destination_nodes": [
+        {
+          "name": "pi2",
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": ["pi1"]
+        },
+        {
+          "name": "pi3",
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": ["pi1"]
+        }
+      ]
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.message_bridge.Timestamp",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "source_node": "pi2",
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": ["pi2"]
+        }
+      ]
+    },
+    {
+      "name": "/pi3/aos",
+      "type": "aos.message_bridge.Timestamp",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "source_node": "pi3",
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": ["pi3"]
+        }
+      ]
+    },
+    {
+      "name": "/pi1/aos/remote_timestamps/pi2/pi1/aos/aos-message_bridge-Timestamp",
+      "type": "aos.message_bridge.RemoteMessage",
+      "logger": "NOT_LOGGED",
+      "num_senders": 2,
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi1/aos/remote_timestamps/pi3/pi1/aos/aos-message_bridge-Timestamp",
+      "type": "aos.message_bridge.RemoteMessage",
+      "logger": "NOT_LOGGED",
+      "num_senders": 2,
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos/remote_timestamps/pi1/pi2/aos/aos-message_bridge-Timestamp",
+      "type": "aos.message_bridge.RemoteMessage",
+      "logger": "NOT_LOGGED",
+      "num_senders": 2,
+      "source_node": "pi2"
+    },
+    {
+      "name": "/pi3/aos/remote_timestamps/pi1/pi3/aos/aos-message_bridge-Timestamp",
+      "type": "aos.message_bridge.RemoteMessage",
+      "logger": "NOT_LOGGED",
+      "num_senders": 2,
+      "source_node": "pi3"
+    },
+    {
+      "name": "/pi1/aos/remote_timestamps/pi2/test2/aos-examples-Ping",
+      "type": "aos.message_bridge.RemoteMessage",
+      "logger": "NOT_LOGGED",
+      "num_senders": 2,
+      "source_node": "pi1",
+      "frequency": 150
+    },
+    {
+      "name": "/pi1/aos/remote_timestamps/pi3/test3/aos-examples-Ping",
+      "type": "aos.message_bridge.RemoteMessage",
+      "logger": "NOT_LOGGED",
+      "num_senders": 2,
+      "source_node": "pi1",
+      "frequency": 150
+    },
+    /* Forwarded to pi2 */
+    {
+      "name": "/test2",
+      "type": "aos.examples.Ping",
+      "source_node": "pi1",
+      "destination_nodes": [
+        {
+          "name": "pi2",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": [
+            "pi1"
+          ],
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    {
+      "name": "/test2",
+      "type": "aos.examples.Pong",
+      "source_node": "pi2",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    /* Forwarded to pi3 */
+    {
+      "name": "/test3",
+      "type": "aos.examples.Ping",
+      "source_node": "pi1",
+      "destination_nodes": [
+        {
+          "name": "pi3",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": [
+            "pi1"
+          ],
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    {
+      "name": "/test3",
+      "type": "aos.examples.Pong",
+      "source_node": "pi3",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    }
+  ],
+  "maps": [
+    {
+      "match": {
+        "name": "/aos*",
+        "source_node": "pi1"
+      },
+      "rename": {
+        "name": "/pi1/aos"
+      }
+    },
+    {
+      "match": {
+        "name": "/aos*",
+        "source_node": "pi2"
+      },
+      "rename": {
+        "name": "/pi2/aos"
+      }
+    },
+    {
+      "match": {
+        "name": "/aos*",
+        "source_node": "pi3"
+      },
+      "rename": {
+        "name": "/pi3/aos"
+      }
+    }
+  ],
+  "nodes": [
+    {
+      "name": "pi1",
+      "hostname": "raspberrypi",
+      "port": 9971
+    },
+    {
+      "name": "pi2",
+      "hostname": "raspberrypi2",
+      "port": 9971
+    },
+    {
+      "name": "pi3",
+      "hostname": "raspberrypi3",
+      "port": 9971
+    }
+  ]
+}

diff --git a/aos/events/logging/multinode_pingpong_reboot_ooo.json b/aos/events/logging/multinode_pingpong_reboot_ooo.json
new file mode 100644
index 0000000..80d2972
--- /dev/null
+++ b/aos/events/logging/multinode_pingpong_reboot_ooo.json

@@ -0,0 +1,240 @@
+{
+  "channels": [
+    {
+      "name": "/pi1/aos",
+      "type": "aos.logging.LogMessageFbs",
+      "source_node": "pi1",
+      "frequency": 200,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.logging.LogMessageFbs",
+      "source_node": "pi2",
+      "frequency": 200,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    /* Logged on pi1 locally */
+    {
+      "name": "/pi1/aos",
+      "type": "aos.timing.Report",
+      "source_node": "pi1",
+      "frequency": 50,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.timing.Report",
+      "source_node": "pi2",
+      "frequency": 50,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.message_bridge.ServerStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.message_bridge.ServerStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi2"
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.logging.DynamicLogCommand",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.logging.DynamicLogCommand",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi2"
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.message_bridge.ClientStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.message_bridge.ClientStatistics",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi2"
+    },
+    {
+      "name": "/pi1/aos",
+      "type": "aos.message_bridge.Timestamp",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi2"],
+      "source_node": "pi1",
+      "destination_nodes": [
+        {
+          "name": "pi2",
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": ["pi1"]
+        }
+      ]
+    },
+    {
+      "name": "/pi2/aos",
+      "type": "aos.message_bridge.Timestamp",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "source_node": "pi2",
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "timestamp_logger": "LOCAL_AND_REMOTE_LOGGER",
+          "timestamp_logger_nodes": ["pi2"]
+        }
+      ]
+    },
+    {
+      "name": "/pi1/aos/remote_timestamps/pi2/pi1/aos/aos-message_bridge-Timestamp",
+      "type": "aos.message_bridge.RemoteMessage",
+      /* Log one remote timestamp channel to ensure everythings still works. */
+      "logger": "LOCAL_LOGGER",
+      "num_senders": 2,
+      "source_node": "pi1"
+    },
+    {
+      "name": "/pi2/aos/remote_timestamps/pi1/pi2/aos/aos-message_bridge-Timestamp",
+      "type": "aos.message_bridge.RemoteMessage",
+      "logger": "NOT_LOGGED",
+      "num_senders": 2,
+      "source_node": "pi2"
+    },
+
+
+    {
+      "name": "/atest1",
+      "type": "aos.examples.Ping",
+      "source_node": "pi1",
+      "destination_nodes": [
+        {
+          "name": "pi2",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    {
+      "name": "/atest1",
+      "type": "aos.examples.Pong",
+      "source_node": "pi2",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    {
+      "name": "/atest2",
+      "type": "aos.examples.Ping",
+      "source_node": "pi1",
+      "destination_nodes": [
+        {
+          "name": "pi2",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    {
+      "name": "/atest2",
+      "type": "aos.examples.Pong",
+      "source_node": "pi2",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    {
+      "name": "/atest3",
+      "type": "aos.examples.Ping",
+      "source_node": "pi1",
+      "destination_nodes": [
+        {
+          "name": "pi2",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    },
+    {
+      "name": "/atest3",
+      "type": "aos.examples.Pong",
+      "source_node": "pi2",
+      "logger": "LOCAL_AND_REMOTE_LOGGER",
+      "logger_nodes": ["pi1"],
+      "destination_nodes": [
+        {
+          "name": "pi1",
+          "priority": 1,
+          "timestamp_logger": "LOCAL_LOGGER",
+          "time_to_live": 5000000
+        }
+      ],
+      "frequency": 150
+    }
+  ],
+  "maps": [
+    {
+      "match": {
+        "name": "/aos*",
+        "source_node": "pi1"
+      },
+      "rename": {
+        "name": "/pi1/aos"
+      }
+    },
+    {
+      "match": {
+        "name": "/aos*",
+        "source_node": "pi2"
+      },
+      "rename": {
+        "name": "/pi2/aos"
+      }
+    }
+  ],
+  "nodes": [
+    {
+      "name": "pi1",
+      "hostname": "raspberrypi",
+      "port": 9971
+    },
+    {
+      "name": "pi2",
+      "hostname": "raspberrypi2",
+      "port": 9971
+    }
+  ]
+}

diff --git a/aos/events/logging/multinode_single_node.json b/aos/events/logging/multinode_single_node.json
new file mode 100644
index 0000000..d3a1090
--- /dev/null
+++ b/aos/events/logging/multinode_single_node.json

@@ -0,0 +1,33 @@
+{
+  "channels": [
+    {
+      "name": "/aos",
+      "type": "aos.logging.LogMessageFbs",
+      "source_node": "pi1",
+      "frequency": 200,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/aos",
+      "type": "aos.timing.Report",
+      "source_node": "pi1",
+      "frequency": 50,
+      "num_senders": 20,
+      "max_size": 2048
+    },
+    {
+      "name": "/aos",
+      "type": "aos.logging.DynamicLogCommand",
+      "logger": "LOCAL_LOGGER",
+      "source_node": "pi1"
+    }
+  ],
+  "nodes": [
+    {
+      "name": "pi1",
+      "hostname": "raspberrypi",
+      "port": 9971
+    }
+  ]
+}

diff --git a/aos/events/logging/realtime_replay_test.cc b/aos/events/logging/realtime_replay_test.cc
index 8c7751e..4118550 100644
--- a/aos/events/logging/realtime_replay_test.cc
+++ b/aos/events/logging/realtime_replay_test.cc

@@ -363,7 +363,7 @@
                    &config_.message(), &replay_channels);
   EXPECT_DEATH(
       reader.RemapLoggedChannel<aos::examples::Ping>("/fake", "/original"),
-      "which is not included in the replay channels passed to LogReader");
+      "which is not included in the replay channels passed to");
 }
 
 }  // namespace aos::logger::testing

diff --git a/aos/events/logging/replay_channels.h b/aos/events/logging/replay_channels.h
new file mode 100644
index 0000000..f9ec144
--- /dev/null
+++ b/aos/events/logging/replay_channels.h

@@ -0,0 +1,15 @@
+#ifndef AOS_EVENTS_LOGGING_REPLAY_CHANNELS_H_
+#define AOS_EVENTS_LOGGING_REPLAY_CHANNELS_H_
+
+#include <string>
+#include <vector>
+
+namespace aos {
+namespace logger {
+// Vector of pair of name and type of the channel
+using ReplayChannels = std::vector<std::pair<std::string, std::string>>;
+// Vector of channel indices
+using ReplayChannelIndices = std::vector<size_t>;
+}  // namespace logger
+}  // namespace aos
+#endif  // AOS_EVENTS_LOGGING_REPLAY_CHANNELS_H_

diff --git a/aos/events/logging/single_node_merge.cc b/aos/events/logging/single_node_merge.cc
index aee2c5e..0fa3567 100644
--- a/aos/events/logging/single_node_merge.cc
+++ b/aos/events/logging/single_node_merge.cc

@@ -21,7 +21,7 @@
 
 int Main(int argc, char **argv) {
   const LogFilesContainer log_files(SortParts(FindLogs(argc, argv)));
-  const Configuration *config = log_files.config();
+  const Configuration *config = log_files.config().get();
 
   // Haven't tested this on a single node log, and don't really see a need to
   // right now.  The higher layers just work.
@@ -37,8 +37,8 @@
     // parts to not be from the same boot.
     if (log_files.ContainsPartsForNode(node_name)) {
       // Filter the parts relevant to each node when building the mapper.
-      mappers.emplace_back(
-          std::make_unique<TimestampMapper>(node_name, log_files));
+      mappers.emplace_back(std::make_unique<TimestampMapper>(
+          node_name, log_files, TimestampQueueStrategy::kQueueTogether));
       if (node_name == FLAGS_node) {
         node_mapper = mappers.back().get();
       }

diff --git a/aos/events/logging/timestamp_extractor.cc b/aos/events/logging/timestamp_extractor.cc
index 0b22f93..3cf96f6 100644
--- a/aos/events/logging/timestamp_extractor.cc
+++ b/aos/events/logging/timestamp_extractor.cc

@@ -6,8 +6,8 @@
 
 #include "aos/events/logging/logfile_sorting.h"
 #include "aos/events/logging/logfile_utils.h"
+#include "aos/events/logging/logfile_validator.h"
 #include "aos/init.h"
-#include "aos/network/multinode_timestamp_filter.h"
 
 DECLARE_bool(timestamps_to_csv);
 DEFINE_bool(skip_order_validation, false,
@@ -15,135 +15,9 @@
 
 namespace aos::logger {
 
-namespace chrono = std::chrono;
-
 int Main(int argc, char **argv) {
   const LogFilesContainer log_files(SortParts(FindLogs(argc, argv)));
-  const Configuration *config = log_files.config();
-
-  CHECK(configuration::MultiNode(config))
-      << ": Timestamps only make sense in a multi-node world.";
-
-  // Now, build up all the TimestampMapper classes to read and sort the data.
-  std::vector<std::unique_ptr<TimestampMapper>> mappers;
-
-  for (const Node *node : configuration::GetNodes(config)) {
-    auto node_name = MaybeNodeName(node);
-    // Confirm that all the parts are from the same boot if there are enough
-    // parts to not be from the same boot.
-    if (!log_files.ContainsPartsForNode(node_name)) {
-      // Filter the parts relevant to each node when building the mapper.
-      mappers.emplace_back(
-          std::make_unique<TimestampMapper>(node_name, log_files));
-    } else {
-      mappers.emplace_back(nullptr);
-    }
-  }
-
-  // Now, build up the estimator used to solve for time.
-  message_bridge::MultiNodeNoncausalOffsetEstimator multinode_estimator(
-      config, config, log_files.boots(), FLAGS_skip_order_validation,
-      chrono::seconds(0));
-  multinode_estimator.set_reboot_found(
-      [config](distributed_clock::time_point reboot_time,
-               const std::vector<logger::BootTimestamp> &node_times) {
-        LOG(INFO) << "Rebooted at distributed " << reboot_time;
-        size_t node_index = 0;
-        for (const logger::BootTimestamp &time : node_times) {
-          LOG(INFO) << "  "
-                    << config->nodes()->Get(node_index)->name()->string_view()
-                    << " " << time;
-          ++node_index;
-        }
-      });
-
-  {
-    std::vector<TimestampMapper *> timestamp_mappers;
-    for (std::unique_ptr<TimestampMapper> &mapper : mappers) {
-      timestamp_mappers.emplace_back(mapper.get());
-    }
-    multinode_estimator.SetTimestampMappers(std::move(timestamp_mappers));
-  }
-
-  // To make things more like the logger and faster, cache the node + channel ->
-  // filter mapping in a set of vectors.
-  std::vector<std::vector<message_bridge::NoncausalOffsetEstimator *>> filters;
-  filters.resize(configuration::NodesCount(config));
-
-  for (const Node *node : configuration::GetNodes(config)) {
-    const size_t node_index = configuration::GetNodeIndex(config, node);
-    filters[node_index].resize(config->channels()->size(), nullptr);
-    for (size_t channel_index = 0; channel_index < config->channels()->size();
-         ++channel_index) {
-      const Channel *channel = config->channels()->Get(channel_index);
-
-      if (!configuration::ChannelIsSendableOnNode(channel, node) &&
-          configuration::ChannelIsReadableOnNode(channel, node)) {
-        // We've got a message which is being forwarded to this node.
-        const Node *source_node = configuration::GetNode(
-            config, channel->source_node()->string_view());
-        filters[node_index][channel_index] =
-            multinode_estimator.GetFilter(node, source_node);
-      }
-    }
-  }
-
-  multinode_estimator.CheckGraph();
-
-  // Now, read all the timestamps for each node.  This is simpler than the
-  // logger on purpose.  It loads in *all* the timestamps in 1 go per node,
-  // ignoring memory usage.
-  for (const Node *node : configuration::GetNodes(config)) {
-    LOG(INFO) << "Reading all data for " << node->name()->string_view();
-    const size_t node_index = configuration::GetNodeIndex(config, node);
-    TimestampMapper *timestamp_mapper = mappers[node_index].get();
-    if (timestamp_mapper == nullptr) {
-      continue;
-    }
-    while (true) {
-      TimestampedMessage *m = timestamp_mapper->Front();
-      if (m == nullptr) {
-        break;
-      }
-      timestamp_mapper->PopFront();
-    }
-  }
-
-  // Don't get clever. Use the first time as the start time.  Note: this is
-  // different than how log_cat and others work.
-  std::optional<const std::tuple<distributed_clock::time_point,
-                                 std::vector<BootTimestamp>> *>
-      next_timestamp = multinode_estimator.QueueNextTimestamp();
-  CHECK(next_timestamp);
-  LOG(INFO) << "Starting at:";
-  for (const Node *node : configuration::GetNodes(config)) {
-    const size_t node_index = configuration::GetNodeIndex(config, node);
-    LOG(INFO) << "  " << node->name()->string_view() << " -> "
-              << std::get<1>(*next_timestamp.value())[node_index].time;
-  }
-
-  std::vector<monotonic_clock::time_point> just_monotonic(
-      std::get<1>(*next_timestamp.value()).size());
-  for (size_t i = 0; i < just_monotonic.size(); ++i) {
-    CHECK_EQ(std::get<1>(*next_timestamp.value())[i].boot, 0u);
-    just_monotonic[i] = std::get<1>(*next_timestamp.value())[i].time;
-  }
-  multinode_estimator.Start(just_monotonic);
-
-  // As we pull off all the timestamps, the time problem is continually solved,
-  // filling in the CSV files.
-  while (true) {
-    std::optional<const std::tuple<distributed_clock::time_point,
-                                   std::vector<BootTimestamp>> *>
-        next_timestamp = multinode_estimator.QueueNextTimestamp();
-    if (!next_timestamp) {
-      break;
-    }
-    multinode_estimator.ObserveTimePassed(std::get<0>(*next_timestamp.value()));
-  }
-
-  LOG(INFO) << "Done";
-
+  CHECK(MultiNodeLogIsReadable(log_files, FLAGS_skip_order_validation));
   return 0;
 }
 

diff --git a/aos/events/ping.cc b/aos/events/ping.cc
index 83a066b..975f2e4 100644
--- a/aos/events/ping.cc
+++ b/aos/events/ping.cc

@@ -11,6 +11,7 @@
 
 int main(int argc, char **argv) {
   aos::InitGoogle(&argc, &argv);
+  aos::EventLoop::SetDefaultVersionString("ping_version");
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(FLAGS_config);

diff --git a/aos/events/ping_lib.cc b/aos/events/ping_lib.cc
index a7a8dd4..56dde2f 100644
--- a/aos/events/ping_lib.cc
+++ b/aos/events/ping_lib.cc

@@ -7,7 +7,7 @@
 #include "aos/events/pong_generated.h"
 #include "aos/json_to_flatbuffer.h"
 
-DEFINE_int32(sleep_ms, 10, "Time to sleep between pings");
+DEFINE_int32(sleep_us, 10000, "Time to sleep between pings");
 
 namespace aos {
 
@@ -26,7 +26,7 @@
 
   event_loop_->OnRun([this]() {
     timer_handle_->Schedule(event_loop_->monotonic_now(),
-                            chrono::milliseconds(FLAGS_sleep_ms));
+                            chrono::microseconds(FLAGS_sleep_us));
   });
 
   event_loop_->SetRuntimeRealtimePriority(5);
@@ -35,7 +35,7 @@
 void Ping::SendPing() {
   if (last_pong_value_ != count_ && (!quiet_ || VLOG_IS_ON(1))) {
     LOG(WARNING) << "Did not receive response to " << count_ << " within "
-                 << FLAGS_sleep_ms << "ms.";
+                 << FLAGS_sleep_us << "us.";
   }
   ++count_;
   aos::Sender<examples::Ping>::Builder builder = sender_.MakeBuilder();

diff --git a/aos/events/pong.cc b/aos/events/pong.cc
index 6a22b6b..9162567 100644
--- a/aos/events/pong.cc
+++ b/aos/events/pong.cc

@@ -11,6 +11,7 @@
 
 int main(int argc, char **argv) {
   aos::InitGoogle(&argc, &argv);
+  aos::EventLoop::SetDefaultVersionString("pong_version");
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig(FLAGS_config);

diff --git a/aos/events/pong_lib.cc b/aos/events/pong_lib.cc
index 93a551e..e8bf171 100644
--- a/aos/events/pong_lib.cc
+++ b/aos/events/pong_lib.cc

@@ -6,27 +6,45 @@
 #include "aos/events/ping_generated.h"
 #include "aos/events/pong_generated.h"
 
+DEFINE_bool(fetch, false, "Poll & fetch messages instead of using a watcher.");
+DEFINE_uint32(fetch_period_ms, 10, "Frequency at which to fetch.");
+
 namespace aos {
 
 Pong::Pong(EventLoop *event_loop)
     : event_loop_(event_loop),
+      fetcher_(event_loop_->MakeFetcher<examples::Ping>("/test")),
       sender_(event_loop_->MakeSender<examples::Pong>("/test")) {
-  event_loop_->MakeWatcher("/test", [this](const examples::Ping &ping) {
-    if (last_value_ == ping.value() && (!quiet_ || VLOG_IS_ON(1))) {
-      LOG(WARNING) << "Duplicate ping value at " << last_value_
-                   << " time difference " << ping.send_time() - last_send_time_;
-    }
-    last_value_ = ping.value();
-    last_send_time_ = ping.send_time();
-    aos::Sender<examples::Pong>::Builder builder = sender_.MakeBuilder();
-    examples::Pong::Builder pong_builder =
-        builder.MakeBuilder<examples::Pong>();
-    pong_builder.add_value(ping.value());
-    pong_builder.add_initial_send_time(ping.send_time());
-    builder.CheckOk(builder.Send(pong_builder.Finish()));
-  });
+  if (FLAGS_fetch) {
+    event_loop_
+        ->AddPhasedLoop(
+            [this](int) {
+              while (fetcher_.FetchNext()) {
+                HandlePing(*fetcher_.get());
+              }
+            },
+            std::chrono::milliseconds(FLAGS_fetch_period_ms))
+        ->set_name("pong");
+  } else {
+    event_loop_->MakeWatcher(
+        "/test", [this](const examples::Ping &ping) { HandlePing(ping); });
+  }
 
   event_loop_->SetRuntimeRealtimePriority(5);
 }
 
+void Pong::HandlePing(const examples::Ping &ping) {
+  if (last_value_ == ping.value() && (!quiet_ || VLOG_IS_ON(1))) {
+    LOG(WARNING) << "Duplicate ping value at " << last_value_
+                 << " time difference " << ping.send_time() - last_send_time_;
+  }
+  last_value_ = ping.value();
+  last_send_time_ = ping.send_time();
+  aos::Sender<examples::Pong>::Builder builder = sender_.MakeBuilder();
+  examples::Pong::Builder pong_builder = builder.MakeBuilder<examples::Pong>();
+  pong_builder.add_value(ping.value());
+  pong_builder.add_initial_send_time(ping.send_time());
+  builder.CheckOk(builder.Send(pong_builder.Finish()));
+}
+
 }  // namespace aos

diff --git a/aos/events/pong_lib.h b/aos/events/pong_lib.h
index 8b6641a..a12dad0 100644
--- a/aos/events/pong_lib.h
+++ b/aos/events/pong_lib.h

@@ -15,7 +15,9 @@
   void set_quiet(bool quiet) { quiet_ = quiet; }
 
  private:
+  void HandlePing(const examples::Ping &ping);
   EventLoop *event_loop_;
+  aos::Fetcher<examples::Ping> fetcher_;
   aos::Sender<examples::Pong> sender_;
   int32_t last_value_ = 0;
   int32_t last_send_time_ = 0;

diff --git a/aos/events/shm_event_loop.cc b/aos/events/shm_event_loop.cc
index 8523cdf..3126265 100644
--- a/aos/events/shm_event_loop.cc
+++ b/aos/events/shm_event_loop.cc

@@ -38,8 +38,6 @@
 
 }  // namespace
 
-DEFINE_string(shm_base, "/dev/shm/aos",
-              "Directory to place queue backing mmaped files in.");
 // This value is affected by the umask of the process which is calling it
 // and is set to the user's value by default (check yours running `umask` on
 // the command line).
@@ -59,10 +57,6 @@
 
 using namespace shm_event_loop_internal;
 
-void SetShmBase(const std::string_view base) {
-  FLAGS_shm_base = std::string(base) + "/aos";
-}
-
 namespace {
 
 const Node *MaybeMyNode(const Configuration *configuration) {
@@ -165,14 +159,16 @@
     }
   }
 
-  bool FetchNext() {
+  bool FetchNext() { return FetchNextIf(std::ref(should_fetch_)); }
+
+  bool FetchNextIf(std::function<bool(const Context &)> fn) {
     const ipc_lib::LocklessQueueReader::Result read_result =
-        DoFetch(actual_queue_index_);
+        DoFetch(actual_queue_index_, std::move(fn));
 
     return read_result == ipc_lib::LocklessQueueReader::Result::GOOD;
   }
 
-  bool Fetch() {
+  bool FetchIf(std::function<bool(const Context &)> fn) {
     const ipc_lib::QueueIndex queue_index = reader_.LatestIndex();
     // actual_queue_index_ is only meaningful if it was set by Fetch or
     // FetchNext.  This happens when valid_data_ has been set.  So, only
@@ -187,7 +183,7 @@
     }
 
     const ipc_lib::LocklessQueueReader::Result read_result =
-        DoFetch(queue_index);
+        DoFetch(queue_index, std::move(fn));
 
     CHECK(read_result != ipc_lib::LocklessQueueReader::Result::NOTHING_NEW)
         << ": Queue index went backwards.  This should never happen.  "
@@ -196,6 +192,8 @@
     return read_result == ipc_lib::LocklessQueueReader::Result::GOOD;
   }
 
+  bool Fetch() { return FetchIf(std::ref(should_fetch_)); }
+
   Context context() const { return context_; }
 
   bool RegisterWakeup(int priority) {
@@ -229,7 +227,8 @@
 
  private:
   ipc_lib::LocklessQueueReader::Result DoFetch(
-      ipc_lib::QueueIndex queue_index) {
+      ipc_lib::QueueIndex queue_index,
+      std::function<bool(const Context &context)> fn) {
     // TODO(austin): Get behind and make sure it dies.
     char *copy_buffer = nullptr;
     if (copy_data()) {
@@ -239,8 +238,7 @@
         queue_index.index(), &context_.monotonic_event_time,
         &context_.realtime_event_time, &context_.monotonic_remote_time,
         &context_.realtime_remote_time, &context_.remote_queue_index,
-        &context_.source_boot_uuid, &context_.size, copy_buffer,
-        std::ref(should_fetch_));
+        &context_.source_boot_uuid, &context_.size, copy_buffer, std::move(fn));
 
     if (read_result == ipc_lib::LocklessQueueReader::Result::GOOD) {
       if (pin_data()) {
@@ -336,9 +334,7 @@
   Context context_;
 
   // Pre-allocated should_fetch function so we don't allocate.
-  std::function<bool(const Context &)> should_fetch_ = [](const Context &) {
-    return true;
-  };
+  const std::function<bool(const Context &)> should_fetch_;
 };
 
 class ShmFetcher : public RawFetcher {
@@ -364,6 +360,16 @@
     return std::make_pair(false, monotonic_clock::min_time);
   }
 
+  std::pair<bool, monotonic_clock::time_point> DoFetchNextIf(
+      std::function<bool(const Context &context)> fn) override {
+    shm_event_loop()->CheckCurrentThread();
+    if (simple_shm_fetcher_.FetchNextIf(std::move(fn))) {
+      context_ = simple_shm_fetcher_.context();
+      return std::make_pair(true, monotonic_clock::now());
+    }
+    return std::make_pair(false, monotonic_clock::min_time);
+  }
+
   std::pair<bool, monotonic_clock::time_point> DoFetch() override {
     shm_event_loop()->CheckCurrentThread();
     if (simple_shm_fetcher_.Fetch()) {
@@ -373,6 +379,16 @@
     return std::make_pair(false, monotonic_clock::min_time);
   }
 
+  std::pair<bool, monotonic_clock::time_point> DoFetchIf(
+      std::function<bool(const Context &context)> fn) override {
+    shm_event_loop()->CheckCurrentThread();
+    if (simple_shm_fetcher_.FetchIf(std::move(fn))) {
+      context_ = simple_shm_fetcher_.context();
+      return std::make_pair(true, monotonic_clock::now());
+    }
+    return std::make_pair(false, monotonic_clock::min_time);
+  }
+
   absl::Span<const char> GetPrivateMemory() const {
     return simple_shm_fetcher_.GetPrivateMemory();
   }

diff --git a/aos/events/shm_event_loop.h b/aos/events/shm_event_loop.h
index 7b7e68f..0e71f96 100644
--- a/aos/events/shm_event_loop.h
+++ b/aos/events/shm_event_loop.h

@@ -8,11 +8,11 @@
 #include "aos/events/epoll.h"
 #include "aos/events/event_loop.h"
 #include "aos/events/event_loop_generated.h"
+#include "aos/ipc_lib/shm_base.h"
 #include "aos/ipc_lib/signalfd.h"
 #include "aos/stl_mutex/stl_mutex.h"
 
 DECLARE_string(application_name);
-DECLARE_string(shm_base);
 
 namespace aos {
 namespace shm_event_loop_internal {

diff --git a/aos/events/shm_event_loop.rs b/aos/events/shm_event_loop.rs
index 3e387ef..880f72b 100644
--- a/aos/events/shm_event_loop.rs
+++ b/aos/events/shm_event_loop.rs

@@ -233,7 +233,7 @@
 
     use aos_configuration::read_config_from;
     use aos_events_event_loop_runtime::{Sender, Watcher};
-    use aos_init::test_init;
+    use aos_test_init::test_init;
     use ping_rust_fbs::aos::examples as ping;
     use std::sync::atomic::{AtomicUsize, Ordering};
     use std::sync::Barrier;

diff --git a/aos/events/simulated_event_loop.cc b/aos/events/simulated_event_loop.cc
index 7e469f5..7b84380 100644
--- a/aos/events/simulated_event_loop.cc
+++ b/aos/events/simulated_event_loop.cc

@@ -393,6 +393,11 @@
   ~SimulatedFetcher() { simulated_channel_->UnregisterFetcher(this); }
 
   std::pair<bool, monotonic_clock::time_point> DoFetchNext() override {
+    return DoFetchNextIf(std::function<bool(const Context &context)>());
+  }
+
+  std::pair<bool, monotonic_clock::time_point> DoFetchNextIf(
+      std::function<bool(const Context &context)> fn) override {
     // The allocations in here are due to infrastructure and don't count in the
     // no mallocs in RT code.
     ScopedNotRealtime nrt;
@@ -404,12 +409,27 @@
                          << configuration::StrippedChannelToString(
                                 simulated_channel_->channel());
 
-    SetMsg(msgs_.front());
+    if (fn) {
+      Context context = msgs_.front()->context;
+      context.data = nullptr;
+      context.buffer_index = -1;
+
+      if (!fn(context)) {
+        return std::make_pair(false, monotonic_clock::min_time);
+      }
+    }
+
+    SetMsg(std::move(msgs_.front()));
     msgs_.pop_front();
     return std::make_pair(true, event_loop()->monotonic_now());
   }
 
   std::pair<bool, monotonic_clock::time_point> DoFetch() override {
+    return DoFetchIf(std::function<bool(const Context &context)>());
+  }
+
+  std::pair<bool, monotonic_clock::time_point> DoFetchIf(
+      std::function<bool(const Context &context)> fn) override {
     // The allocations in here are due to infrastructure and don't count in the
     // no mallocs in RT code.
     ScopedNotRealtime nrt;
@@ -417,13 +437,35 @@
       // TODO(austin): Can we just do this logic unconditionally?  It is a lot
       // simpler.  And call clear, obviously.
       if (!msg_ && simulated_channel_->latest_message()) {
-        SetMsg(simulated_channel_->latest_message());
+        std::shared_ptr<SimulatedMessage> latest_message =
+            simulated_channel_->latest_message();
+
+        if (fn) {
+          Context context = latest_message->context;
+          context.data = nullptr;
+          context.buffer_index = -1;
+
+          if (!fn(context)) {
+            return std::make_pair(false, monotonic_clock::min_time);
+          }
+        }
+        SetMsg(std::move(latest_message));
         return std::make_pair(true, event_loop()->monotonic_now());
       } else {
         return std::make_pair(false, monotonic_clock::min_time);
       }
     }
 
+    if (fn) {
+      Context context = msgs_.back()->context;
+      context.data = nullptr;
+      context.buffer_index = -1;
+
+      if (!fn(context)) {
+        return std::make_pair(false, monotonic_clock::min_time);
+      }
+    }
+
     // We've had a message enqueued, so we don't need to go looking for the
     // latest message from before we started.
     SetMsg(msgs_.back());
@@ -1300,7 +1342,7 @@
         new NodeEventLoopFactory(&scheduler_scheduler_, this, node));
   }
 
-  if (configuration::MultiNode(configuration)) {
+  if (configuration::NodesCount(configuration) > 1u) {
     bridge_ = std::make_unique<message_bridge::SimulatedMessageBridge>(this);
   }
 }

diff --git a/aos/events/simulated_event_loop.rs b/aos/events/simulated_event_loop.rs
index ea3a154..90c4c86 100644
--- a/aos/events/simulated_event_loop.rs
+++ b/aos/events/simulated_event_loop.rs

@@ -143,7 +143,7 @@
     use runfiles::Runfiles;
 
     use aos_configuration::read_config_from;
-    use aos_init::test_init;
+    use aos_test_init::test_init;
     use ping_rust_fbs::aos::examples::PingBuilder;
 
     // A really basic test of the functionality here.

diff --git a/aos/events/simulated_event_loop_test.cc b/aos/events/simulated_event_loop_test.cc
index 0afa22e..23badae 100644
--- a/aos/events/simulated_event_loop_test.cc
+++ b/aos/events/simulated_event_loop_test.cc

@@ -2101,6 +2101,18 @@
                "All ExitHandles must be destroyed before the factory");
 }
 
+// Test that AllowApplicationCreationDuring can't happen in OnRun callbacks.
+TEST(SimulatedEventLoopDeathTest, AllowApplicationCreationDuringInOnRun) {
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(
+          ArtifactPath("aos/events/multinode_pingpong_test_split_config.json"));
+  auto factory = std::make_unique<SimulatedEventLoopFactory>(&config.message());
+  NodeEventLoopFactory *pi1 = factory->GetNodeEventLoopFactory("pi1");
+  std::unique_ptr<EventLoop> loop = pi1->MakeEventLoop("foo");
+  loop->OnRun([&]() { factory->AllowApplicationCreationDuring([]() {}); });
+  EXPECT_DEATH(factory->RunFor(chrono::seconds(1)), "OnRun");
+}
+
 // Tests that messages don't survive a reboot of a node.
 TEST(SimulatedEventLoopTest, ChannelClearedOnReboot) {
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =

diff --git a/aos/events/timing_report_dump_lib.cc b/aos/events/timing_report_dump_lib.cc
index 86936f1..15a8eb9 100644
--- a/aos/events/timing_report_dump_lib.cc
+++ b/aos/events/timing_report_dump_lib.cc

@@ -181,10 +181,16 @@
     LOG(INFO) << "Failed to send " << report.send_failures()
               << " timing report(s) in " << report.name()->string_view();
   }
+  std::string version_string;
+  if (report.has_version()) {
+    version_string =
+        absl::StrCat("version: \"", report.version()->string_view(), "\" ");
+  }
   std::cout << report.name()->string_view() << "[" << report.pid() << "] ("
-            << MaybeNodeName("", event_loop_->node()) << ") ("
-            << event_loop_->context().monotonic_event_time << ","
+            << MaybeNodeName("", event_loop_->node()) << ") " << version_string
+            << "(" << event_loop_->context().monotonic_event_time << ","
             << event_loop_->context().realtime_event_time << "):" << std::endl;
+
   if (report.has_watchers() && report.watchers()->size() > 0) {
     PrintWatchers(&std::cout, *report.watchers());
   }
@@ -314,6 +320,9 @@
   if (accumulated_statistics_.count(map_key) == 0) {
     accumulated_statistics_[map_key].name = raw_report.name()->str();
     accumulated_statistics_[map_key].pid = raw_report.pid();
+    if (raw_report.has_version()) {
+      accumulated_statistics_[map_key].version = raw_report.version()->str();
+    }
   }
 
   timing::ReportT report;

diff --git a/aos/init.rs b/aos/init.rs
index 9ac62f1..13066f1 100644
--- a/aos/init.rs
+++ b/aos/init.rs

@@ -8,19 +8,7 @@
 generate!("aos::InitFromRust")
 );
 
-/// Initializes things for a test.
-///
-/// TODO(Brian): Should we provide a proc macro attribute that handles calling this?
-///
-/// # Panics
-///
-/// Panics if non-test initialization has already been performed.
-pub fn test_init() {
-    init();
-    // TODO(Brian): Do we want any of the other stuff that `:gtest_main` has?
-    // TODO(Brian): Call `aos::SetShmBase` like `:gtest_main` does.
-}
-
+/// Initializes AOS.
 pub fn init() {
     static ONCE: Once = Once::new();
     ONCE.call_once(|| {

diff --git a/aos/ipc_lib/BUILD b/aos/ipc_lib/BUILD
index 53d30cd..d67616f 100644
--- a/aos/ipc_lib/BUILD
+++ b/aos/ipc_lib/BUILD

@@ -233,6 +233,22 @@
     ],
 )
 
+cc_library(
+    name = "lockless_queue_stepping",
+    testonly = True,
+    srcs = [
+        "lockless_queue_stepping.cc",
+    ],
+    hdrs = ["lockless_queue_stepping.h"],
+    deps = [
+        ":lockless_queue",
+        ":shm_observers",
+        "//aos/libc:aos_strsignal",
+        "//aos/testing:prevent_exit",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "lockless_queue_test",
     timeout = "eternal",
@@ -242,11 +258,11 @@
     deps = [
         ":event",
         ":lockless_queue",
+        ":lockless_queue_stepping",
         ":queue_racer",
         ":signalfd",
         "//aos/events:epoll",
         "//aos/testing:googletest",
-        "//aos/testing:prevent_exit",
         "//aos/util:phased_loop",
     ],
 )
@@ -258,6 +274,7 @@
     deps = [
         ":event",
         ":lockless_queue",
+        ":lockless_queue_stepping",
         ":queue_racer",
         ":shm_observers",
         ":signalfd",
@@ -404,3 +421,14 @@
         "shm_observers.h",
     ],
 )
+
+cc_library(
+    name = "shm_base",
+    srcs = ["shm_base.cc"],
+    hdrs = ["shm_base.h"],
+    target_compatible_with = ["@platforms//os:linux"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@com_github_gflags_gflags//:gflags",
+    ],
+)

diff --git a/aos/ipc_lib/lockless_queue.cc b/aos/ipc_lib/lockless_queue.cc
index 01706ba..fd3a305 100644
--- a/aos/ipc_lib/lockless_queue.cc
+++ b/aos/ipc_lib/lockless_queue.cc

@@ -1,6 +1,7 @@
 #include "aos/ipc_lib/lockless_queue.h"
 
 #include <linux/futex.h>
+#include <pwd.h>
 #include <sys/types.h>
 #include <syscall.h>
 #include <unistd.h>
@@ -647,7 +648,20 @@
     // redo initialization.
     memory->initialized = true;
   } else {
-    CHECK_EQ(uid, memory->uid) << ": UIDs must match for all processes";
+    if (memory->uid != uid) {
+      // Subsequent calls to getpwuid() overwrite this
+      // pointer, pull the thing we care about into a
+      // string.
+      struct passwd const *user_pw = getpwuid(uid);
+      std::string user_username = user_pw->pw_name;
+      struct passwd const *memory_pw = getpwuid(memory->uid);
+      std::string memory_username = memory_pw->pw_name;
+      LOG(FATAL) << "Current user " << user_username << " (uid:" << uid << ") "
+                 << "doesn't match shared memory user " << memory_username
+                 << " (uid:" << memory->uid << "). "
+                 << "Log in as " << memory_username
+                 << " user to access this channel.";
+    }
   }
 
   return memory;
@@ -1269,19 +1283,20 @@
     monotonic_clock::time_point *monotonic_remote_time,
     realtime_clock::time_point *realtime_remote_time,
     uint32_t *remote_queue_index, UUID *source_boot_uuid, size_t *length,
-    char *data, std::function<bool(const Context &)> should_read) const {
-  const size_t queue_size = memory_->queue_size();
+    char *data,
+    std::function<bool(const Context &)> should_read_callback) const {
+  const size_t queue_size = const_memory_->queue_size();
 
   // Build up the QueueIndex.
   const QueueIndex queue_index =
       QueueIndex::Zero(queue_size).IncrementBy(uint32_queue_index);
 
   // Read the message stored at the requested location.
-  Index mi = memory_->LoadIndex(queue_index);
-  const Message *m = memory_->GetMessage(mi);
+  Index mi = const_memory_->LoadIndex(queue_index);
+  const Message *m = const_memory_->GetMessage(mi);
 
   while (true) {
-    DCHECK(!CheckBothRedzones(memory_, m))
+    DCHECK(!CheckBothRedzones(const_memory_, m))
         << ": Invalid message found in shared memory";
     // We need to confirm that the data doesn't change while we are reading it.
     // Do that by first confirming that the message points to the queue index we
@@ -1299,7 +1314,7 @@
       // Someone has re-used this message between when we pulled it out of the
       // queue and when we grabbed its index.  It is pretty hard to deduce
       // what happened. Just try again.
-      const Message *const new_m = memory_->GetMessage(queue_index);
+      const Message *const new_m = const_memory_->GetMessage(queue_index);
       if (m != new_m) {
         m = new_m;
         VLOG(3) << "Retrying, m doesn't match";
@@ -1328,7 +1343,7 @@
       // then they need to wait.  They got ahead.  Otherwise, they are
       // asking for something crazy, like something before the beginning of
       // the queue.  Tell them that they are behind.
-      if (uint32_queue_index < memory_->queue_size()) {
+      if (uint32_queue_index < const_memory_->queue_size()) {
         VLOG(3) << "Near zero, " << std::hex << uint32_queue_index;
         return Result::NOTHING_NEW;
       } else {
@@ -1343,37 +1358,24 @@
 
   // Then read the data out.  Copy it all out to be deterministic and so we can
   // make length be from either end.
-  if (!should_read) {
-    *monotonic_sent_time = m->header.monotonic_sent_time;
-    *realtime_sent_time = m->header.realtime_sent_time;
-    if (m->header.remote_queue_index == 0xffffffffu) {
-      *remote_queue_index = queue_index.index();
-    } else {
-      *remote_queue_index = m->header.remote_queue_index;
-    }
-    *monotonic_remote_time = m->header.monotonic_remote_time;
-    *realtime_remote_time = m->header.realtime_remote_time;
-    *source_boot_uuid = m->header.source_boot_uuid;
-    *length = m->header.length;
+  Context context;
+  context.monotonic_event_time = m->header.monotonic_sent_time;
+  context.realtime_event_time = m->header.realtime_sent_time;
+  context.monotonic_remote_time = m->header.monotonic_remote_time;
+  context.realtime_remote_time = m->header.realtime_remote_time;
+  context.queue_index = queue_index.index();
+  if (m->header.remote_queue_index == 0xffffffffu) {
+    context.remote_queue_index = context.queue_index;
   } else {
-    // Cache the header results so we don't modify the outputs unless the filter
-    // function says "go".
-    Context context;
-    context.monotonic_event_time = m->header.monotonic_sent_time;
-    context.realtime_event_time = m->header.realtime_sent_time;
-    context.monotonic_remote_time = m->header.monotonic_remote_time;
-    context.realtime_remote_time = m->header.realtime_remote_time;
-    context.queue_index = queue_index.index();
-    if (m->header.remote_queue_index == 0xffffffffu) {
-      context.remote_queue_index = context.queue_index;
-    } else {
-      context.remote_queue_index = m->header.remote_queue_index;
-    }
-    context.source_boot_uuid = m->header.source_boot_uuid;
-    context.size = m->header.length;
-    context.data = nullptr;
-    context.buffer_index = -1;
+    context.remote_queue_index = m->header.remote_queue_index;
+  }
+  context.source_boot_uuid = m->header.source_boot_uuid;
+  context.size = m->header.length;
+  context.data = nullptr;
+  context.buffer_index = -1;
 
+  // If the callback is provided, use it.
+  if (should_read_callback) {
     // And finally, confirm that the message *still* points to the queue index
     // we want.  This means it didn't change out from under us. If something
     // changed out from under us, we were reading it much too late in its
@@ -1390,24 +1392,22 @@
 
     // We now know that the context is safe to use.  See if we are supposed to
     // take the message or not.
-    if (!should_read(context)) {
+    if (!should_read_callback(context)) {
       return Result::FILTERED;
     }
-
-    // And now take it.
-    *monotonic_sent_time = context.monotonic_event_time;
-    *realtime_sent_time = context.realtime_event_time;
-    *remote_queue_index = context.remote_queue_index;
-    *monotonic_remote_time = context.monotonic_remote_time;
-    *realtime_remote_time = context.realtime_remote_time;
-    *source_boot_uuid = context.source_boot_uuid;
-    *length = context.size;
   }
-  if (data) {
-    memcpy(data, m->data(memory_->message_data_size()),
-           memory_->message_data_size());
 
-    // Check again since we touched the message again.
+  // Read the data if requested.
+  if (data) {
+    memcpy(data, m->data(const_memory_->message_data_size()),
+           const_memory_->message_data_size());
+  }
+
+  // Now, we need to confirm that nothing has changed by re-reading the queue
+  // index from the header since we've read all the body.  We only need to do it
+  // if we have read anything new after the previous check up above, which
+  // happens if we read the data, or if we didn't check for the filtered case.
+  if (data || !should_read_callback) {
     aos_compiler_memory_barrier();
     const QueueIndex final_queue_index = m->header.queue_index.Load(queue_size);
     if (final_queue_index != queue_index) {
@@ -1419,18 +1419,75 @@
     }
   }
 
+  // And now take it and make it visible to the user.  By doing it here, we will
+  // never present partial or corrupted state to the user in the output
+  // pointers.
+  *monotonic_sent_time = context.monotonic_event_time;
+  *realtime_sent_time = context.realtime_event_time;
+  *remote_queue_index = context.remote_queue_index;
+  *monotonic_remote_time = context.monotonic_remote_time;
+  *realtime_remote_time = context.realtime_remote_time;
+  *source_boot_uuid = context.source_boot_uuid;
+  *length = context.size;
+
   return Result::GOOD;
 }
 
 QueueIndex LocklessQueueReader::LatestIndex() const {
-  const size_t queue_size = memory_->queue_size();
+  const size_t queue_size = const_memory_->queue_size();
 
-  // There is only one interesting case.  We need to know if the queue is empty.
-  // That is done with a sentinel value.  At worst, this will be off by one.
-  const QueueIndex next_queue_index =
-      memory_->next_queue_index.Load(queue_size);
-  if (next_queue_index.valid()) {
-    const QueueIndex current_queue_index = next_queue_index.DecrementBy(1u);
+  // There are 2 main cases.  Either the next queue index is right, or it is
+  // behind by 1 and wrong.  If nothing has been published, the next queue index
+  // will be the reserved "Invalid" value, otherwise it will point to the next
+  // place to write.  We need to figure out if it is right or wrong, and it if
+  // is wrong, fix it. If we don't, Read() can find the next message before
+  // LatestIndex() sees it if someone is hammering on Read() until it returns
+  // nothing new is left, which mean watchers and fetchers may disagree on when
+  // a message is published.
+  QueueIndex actual_next_queue_index =
+      const_memory_->next_queue_index.Load(queue_size);
+
+  // Handle the "nothing has been published" case by making next_queue_index
+  // point to the 0th index.
+  const QueueIndex next_queue_index = ZeroOrValid(actual_next_queue_index);
+
+  // This needs to synchronize with whoever the previous writer at this
+  // location was.  Read what is there to see if the message has been published
+  // and next_queue_index is just behind.
+  Index to_replace = const_memory_->LoadIndex(next_queue_index);
+
+  // See if next_queue_index is consistent with the state of the queue.  If it
+  // is not, try to atomically update next_queue_index in case the previous
+  // writer failed and retry.
+  if (to_replace.IsPlausible(next_queue_index)) {
+    // If next_queue_index ends up pointing to a message with a matching index,
+    // this is what next_queue_index needs to be updated to
+    const QueueIndex incremented_queue_index = next_queue_index.Increment();
+
+    // We don't care about the result.  It will either succeed, or we got
+    // beat in fixing it.  The way the Send logic works, the pointer can never
+    // get more than 1 behind or the next send will repair it.  So, if we fail,
+    // that means that someone else got there first and fixed it up (and
+    // potentially someone further continued to send).
+    //
+    // Both require no further action from us.  Worst case, our Next pointer
+    // will not be the latest message, but there will always be a point after
+    // which the index can change.  We just need a consistent snapshot where
+    // there is nothing in the queue that isn't accounted for by
+    // next_queue_index.
+    memory_->next_queue_index.CompareAndExchangeStrong(actual_next_queue_index,
+                                                       incremented_queue_index);
+
+    VLOG(3) << "next_queue_index is lagging, fixed it.  Found " << std::hex
+            << to_replace.get() << ", expected "
+            << next_queue_index.DecrementBy(queue_size).index();
+
+    actual_next_queue_index = incremented_queue_index;
+  }
+
+  if (actual_next_queue_index.valid()) {
+    const QueueIndex current_queue_index =
+        actual_next_queue_index.DecrementBy(1u);
     return current_queue_index;
   }
   return QueueIndex::Invalid();

diff --git a/aos/ipc_lib/lockless_queue.h b/aos/ipc_lib/lockless_queue.h
index 2cafb48..01f2b7c 100644
--- a/aos/ipc_lib/lockless_queue.h
+++ b/aos/ipc_lib/lockless_queue.h

@@ -414,14 +414,15 @@
     GOOD,
     // The message is in the future and we haven't written it yet.
     NOTHING_NEW,
-    // There is a message, but should_read() returned false so we didn't fetch
-    // it.
+    // There is a message, but should_read_callback() returned false so we
+    // didn't fetch it.
     FILTERED,
     // The message got overwritten while we were reading it.
     OVERWROTE,
   };
 
-  LocklessQueueReader(LocklessQueue queue) : memory_(queue.const_memory()) {
+  LocklessQueueReader(LocklessQueue queue)
+      : memory_(queue.memory()), const_memory_(queue.const_memory()) {
     queue.Initialize();
   }
 
@@ -433,14 +434,14 @@
   // message, but the filter function returned false, return FILTERED.
   //
   // data may be nullptr to indicate the data should not be copied.
-  Result Read(uint32_t queue_index,
-              monotonic_clock::time_point *monotonic_sent_time,
-              realtime_clock::time_point *realtime_sent_time,
-              monotonic_clock::time_point *monotonic_remote_time,
-              realtime_clock::time_point *realtime_remote_time,
-              uint32_t *remote_queue_index, UUID *source_boot_uuid,
-              size_t *length, char *data,
-              std::function<bool(const Context &context)> should_read) const;
+  Result Read(
+      uint32_t queue_index, monotonic_clock::time_point *monotonic_sent_time,
+      realtime_clock::time_point *realtime_sent_time,
+      monotonic_clock::time_point *monotonic_remote_time,
+      realtime_clock::time_point *realtime_remote_time,
+      uint32_t *remote_queue_index, UUID *source_boot_uuid, size_t *length,
+      char *data,
+      std::function<bool(const Context &context)> should_read_callback) const;
 
   // Returns the index to the latest queue message.  Returns empty_queue_index()
   // if there are no messages in the queue.  Do note that this index wraps if
@@ -448,7 +449,8 @@
   QueueIndex LatestIndex() const;
 
  private:
-  const LocklessQueueMemory *const memory_;
+  LocklessQueueMemory *const memory_;
+  const LocklessQueueMemory *const_memory_;
 };
 
 // Returns the number of messages which are logically in the queue at a time.

diff --git a/aos/ipc_lib/lockless_queue_death_test.cc b/aos/ipc_lib/lockless_queue_death_test.cc
index c37dc63..2a95b31 100644
--- a/aos/ipc_lib/lockless_queue_death_test.cc
+++ b/aos/ipc_lib/lockless_queue_death_test.cc

@@ -22,10 +22,9 @@
 #include "aos/ipc_lib/aos_sync.h"
 #include "aos/ipc_lib/lockless_queue.h"
 #include "aos/ipc_lib/lockless_queue_memory.h"
+#include "aos/ipc_lib/lockless_queue_stepping.h"
 #include "aos/ipc_lib/shm_observers.h"
-#include "aos/libc/aos_strsignal.h"
 #include "aos/realtime.h"
-#include "aos/testing/prevent_exit.h"
 #include "aos/testing/test_logging.h"
 
 namespace aos {
@@ -34,529 +33,9 @@
 
 namespace chrono = ::std::chrono;
 
-#if ((defined(AOS_SANITIZER_address) || defined(AOS_SANITIZER_thread)) && \
-     !defined(__clang__) && __GNUC__ <= 4 && __GNUC_MINOR__ <= 8) ||      \
-    defined(__ARM_EABI__)
-// There are various reasons why we might not actually be able to do this
-// testing, but we still want to run the functions to test anything they test
-// other than shm robustness.
-//
-// GCC 4.8 has too old of a version of asan to allow SIGSEGV handler
-// chaining.
-//
-// GCC 4.8's tsan doesn't work with the code for calling the real sigaction to
-// work arounds its weirdness of the SIGTRAP handler.
-//
-// ARM doesn't have a nice way to do single-stepping, and I don't feel like
-// dealing with editing the next instruction to be a breakpoint and then
-// changing it back.
-#else
-
-// This code currently supports amd64 only, but it
-// shouldn't be hard to port to i386 (it should just be using the name for
-// only the smaller part of the flags register), but that's not tested, and
-// porting it to any other architecture is more work.
-// Currently, we skip doing anything exciting on arm (just run the code without
-// any robustness testing) and refuse to compile anywhere else.
-
-#define SIMPLE_ASSERT(condition, message)                                 \
-  do {                                                                    \
-    if (!(condition)) {                                                   \
-      static const char kMessage[] = message "\n";                        \
-      if (write(STDERR_FILENO, kMessage, sizeof(kMessage) - 1) !=         \
-          (sizeof(kMessage) - 1)) {                                       \
-        static const char kFailureMessage[] = "writing failed\n";         \
-        __attribute__((unused)) int ignore = write(                       \
-            STDERR_FILENO, kFailureMessage, sizeof(kFailureMessage) - 1); \
-      }                                                                   \
-      abort();                                                            \
-    }                                                                     \
-  } while (false)
-
-// Array to track writes to memory, and make sure they happen in the right
-// order.
-class WritesArray {
- public:
-  uintptr_t At(size_t location) const {
-    SIMPLE_ASSERT(location < size_, "too far into writes array");
-    return writes_[location];
-  }
-  void Add(uintptr_t pointer) {
-    SIMPLE_ASSERT(size_ < kSize, "too many writes");
-    writes_[size_++] = pointer;
-  }
-
-  size_t size() const { return size_; }
-
- private:
-  static const size_t kSize = 20000;
-
-  uintptr_t writes_[kSize];
-  size_t size_ = 0;
-};
-
-enum class DieAtState {
-  // No SEGVs should be happening.
-  kDisabled,
-  // SEGVs are fine.  Normal operation.
-  kRunning,
-  // We are manipulating a mutex.  No SEGVs should be happening.
-  kWriting,
-};
-
-// What we exit with when we're exiting in the middle.
-const int kExitEarlyValue = 123;
-
-// We have to keep track of everything in a global variable because there's no
-// other way for the signal handlers to find it.
-struct GlobalState {
-  // Pointer to the queue memory, and its size.
-  void *lockless_queue_memory;
-  size_t lockless_queue_memory_size;
-
-  // Pointer to a second block of memory the same size.  This (on purpose) has
-  // the same size as lockless_queue_memory so we can point the robust mutexes
-  // here.
-  void *lockless_queue_memory_lock_backup;
-
-  // Expected writes.
-  const WritesArray *writes_in;
-  // Actual writes.
-  WritesArray *writes_out;
-  // Location to die at, and how far we have gotten.
-  size_t die_at, current_location;
-  // State.
-  DieAtState state;
-};
-::std::atomic<GlobalState *> global_state;
-
-// Returns true if the address is in the queue memory chunk.
-bool IsInLocklessQueueMemory(void *address) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  void *lockless_queue_memory = my_global_state->lockless_queue_memory;
-  if (address < lockless_queue_memory) {
-    return false;
-    if (reinterpret_cast<uintptr_t>(address) >
-        reinterpret_cast<uintptr_t>(lockless_queue_memory) +
-            my_global_state->lockless_queue_memory_size)
-      return false;
-  }
-  return true;
-}
-
-// Calls mprotect(2) for the entire shared memory region with the given prot.
-void ShmProtectOrDie(int prot) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  PCHECK(mprotect(my_global_state->lockless_queue_memory,
-                  my_global_state->lockless_queue_memory_size, prot) != -1)
-      << ": mprotect(" << my_global_state->lockless_queue_memory << ", "
-      << my_global_state->lockless_queue_memory_size << ", 0x" << std::hex
-      << prot << ") failed";
-}
-
-// Checks a write into the queue and conditionally dies.  Tracks the write.
-void HandleWrite(void *address) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  uintptr_t address_offset =
-      reinterpret_cast<uintptr_t>(address) -
-      reinterpret_cast<uintptr_t>(my_global_state->lockless_queue_memory);
-  if (my_global_state->writes_in != nullptr) {
-    SIMPLE_ASSERT(my_global_state->writes_in->At(
-                      my_global_state->current_location) == address_offset,
-                  "wrong write order");
-  }
-  if (my_global_state->writes_out != nullptr) {
-    my_global_state->writes_out->Add(address_offset);
-  }
-  if (my_global_state->die_at != 0) {
-    if (my_global_state->die_at == my_global_state->current_location) {
-      _exit(kExitEarlyValue);
-    }
-  }
-  ++my_global_state->current_location;
-}
-
-struct sigaction old_segv_handler, old_trap_handler;
-
-// Calls the original signal handler.
-bool CallChainedAction(const struct sigaction &action, int signal,
-                       siginfo_t *siginfo, void *context) {
-  if (action.sa_handler == SIG_IGN || action.sa_handler == SIG_DFL) {
-    return false;
-  }
-  if (action.sa_flags & SA_SIGINFO) {
-    action.sa_sigaction(signal, siginfo, context);
-  } else {
-    action.sa_handler(signal);
-  }
-  return true;
-}
-
-void segv_handler(int signal, siginfo_t *siginfo, void *context_void) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  const int saved_errno = errno;
-  SIMPLE_ASSERT(signal == SIGSEGV, "wrong signal for SIGSEGV handler");
-
-  // Only process memory addresses in our shared memory block.
-  if (!IsInLocklessQueueMemory(siginfo->si_addr)) {
-    if (CallChainedAction(old_segv_handler, signal, siginfo, context_void)) {
-      errno = saved_errno;
-      return;
-    } else {
-      SIMPLE_ASSERT(false, "actual SIGSEGV");
-    }
-  }
-  SIMPLE_ASSERT(my_global_state->state == DieAtState::kRunning,
-                "bad state for SIGSEGV");
-
-  HandleWrite(siginfo->si_addr);
-
-  ShmProtectOrDie(PROT_READ | PROT_WRITE);
-  my_global_state->state = DieAtState::kWriting;
-  errno = saved_errno;
-
-#if defined(__x86_64__)
-  __asm__ __volatile__("int $3" ::: "memory", "cc");
-#elif defined(__aarch64__)
-  __asm__ __volatile__("brk #0" ::: "memory", "cc");
-#else
-#error Unhandled architecture
-#endif
-}
-
-// A mutex lock is about to happen.  Mark the memory rw, and check to see if we
-// should die.
-void futex_before(void *address, bool) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  if (IsInLocklessQueueMemory(address)) {
-    assert(my_global_state->state == DieAtState::kRunning);
-    HandleWrite(address);
-    ShmProtectOrDie(PROT_READ | PROT_WRITE);
-    my_global_state->state = DieAtState::kWriting;
-  }
-}
-
-// The SEGV handler has set a breakpoint 1 instruction in the future.  This
-// clears it, marks memory readonly, and continues.
-void trap_handler(int signal, siginfo_t *, void * /*context*/) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  const int saved_errno = errno;
-  SIMPLE_ASSERT(signal == SIGTRAP, "wrong signal for SIGTRAP handler");
-
-  my_global_state->state = DieAtState::kWriting;
-  SIMPLE_ASSERT(my_global_state->state == DieAtState::kWriting,
-                "bad state for SIGTRAP");
-  ShmProtectOrDie(PROT_READ);
-  my_global_state->state = DieAtState::kRunning;
-  errno = saved_errno;
-}
-
-// We have a manual trap for mutexes.  Check to see if we were supposed to die
-// on this write (the compare/exchange for the mutex), and mark the memory ro
-// again.
-void futex_after(void *address, bool) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  if (IsInLocklessQueueMemory(address)) {
-    assert(my_global_state->state == DieAtState::kWriting);
-    ShmProtectOrDie(PROT_READ);
-    my_global_state->state = DieAtState::kRunning;
-  }
-}
-
-// Installs the signal handler.
-void InstallHandler(int signal, void (*handler)(int, siginfo_t *, void *),
-                    struct sigaction *old_action) {
-  struct sigaction action;
-  memset(&action, 0, sizeof(action));
-  action.sa_sigaction = handler;
-  // We don't do a full normal signal handler exit with ptrace, so SA_NODEFER is
-  // necessary to keep our signal handler active.
-  action.sa_flags = SA_RESTART | SA_SIGINFO | SA_NODEFER;
-#ifdef AOS_SANITIZER_thread
-  // Tsan messes with signal handlers to check for race conditions, and it
-  // causes problems, so we have to work around it for SIGTRAP.
-  if (signal == SIGTRAP) {
-    typedef int (*SigactionType)(int, const struct sigaction *,
-                                 struct sigaction *);
-    SigactionType real_sigaction =
-        reinterpret_cast<SigactionType>(dlsym(RTLD_NEXT, "sigaction"));
-    if (sigaction == real_sigaction) {
-      LOG(WARNING) << "failed to work around tsan signal handling weirdness";
-    }
-    PCHECK(real_sigaction(signal, &action, old_action) == 0);
-    return;
-  }
-#endif
-  PCHECK(sigaction(signal, &action, old_action) == 0);
-}
-
-// gtest only allows creating fatal failures in functions returning void...
-// status is from wait(2).
-void DetectFatalFailures(int status) {
-  if (WIFEXITED(status)) {
-    FAIL() << " child returned status "
-           << ::std::to_string(WEXITSTATUS(status));
-  } else if (WIFSIGNALED(status)) {
-    FAIL() << " child exited because of signal "
-           << aos_strsignal(WTERMSIG(status));
-  } else {
-    FAIL() << " child exited with status " << ::std::hex << status;
-  }
-}
-
-// Returns true if it runs all the way through.
-bool RunFunctionDieAt(::std::function<void(void *)> prepare,
-                      ::std::function<void(void *)> function,
-                      bool *test_failure, size_t die_at, bool prepare_in_child,
-                      uintptr_t writable_offset, const WritesArray *writes_in,
-                      WritesArray *writes_out) {
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  my_global_state->writes_in = writes_in;
-  my_global_state->writes_out = writes_out;
-  my_global_state->die_at = die_at;
-  my_global_state->current_location = 0;
-  my_global_state->state = DieAtState::kDisabled;
-
-  if (!prepare_in_child) prepare(my_global_state->lockless_queue_memory);
-
-  const pid_t pid = fork();
-  PCHECK(pid != -1) << ": fork() failed";
-  if (pid == 0) {
-    // Run the test.
-    ::aos::testing::PreventExit();
-
-    if (prepare_in_child) prepare(my_global_state->lockless_queue_memory);
-
-    // Update the robust list offset.
-    linux_code::ipc_lib::SetRobustListOffset(writable_offset);
-    // Install a segv handler (to detect writes to the memory block), and a trap
-    // handler so we can single step.
-    InstallHandler(SIGSEGV, segv_handler, &old_segv_handler);
-    InstallHandler(SIGTRAP, trap_handler, &old_trap_handler);
-    CHECK_EQ(old_trap_handler.sa_handler, SIG_DFL);
-    linux_code::ipc_lib::SetShmAccessorObservers(futex_before, futex_after);
-
-    PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
-    ShmProtectOrDie(PROT_READ);
-    my_global_state->state = DieAtState::kRunning;
-
-    function(my_global_state->lockless_queue_memory);
-    my_global_state->state = DieAtState::kDisabled;
-    ShmProtectOrDie(PROT_READ | PROT_WRITE);
-    _exit(0);
-  } else {
-    // Annoying wrapper type because elf_gregset_t is an array, which C++
-    // handles poorly.
-    struct RestoreState {
-      RestoreState(elf_gregset_t regs_in) {
-        memcpy(regs, regs_in, sizeof(regs));
-      }
-      elf_gregset_t regs;
-    };
-    std::optional<RestoreState> restore_regs;
-    bool pass_trap = false;
-    // Wait until the child process dies.
-    while (true) {
-      int status;
-      pid_t waited_on = waitpid(pid, &status, 0);
-      if (waited_on == -1) {
-        if (errno == EINTR) continue;
-        PCHECK(false) << ": waitpid(" << pid << ", " << &status
-                      << ", 0) failed";
-      }
-      CHECK_EQ(waited_on, pid)
-          << ": waitpid got child " << waited_on << " instead of " << pid;
-      if (WIFSTOPPED(status)) {
-        // The child was stopped via ptrace.
-        const int stop_signal = WSTOPSIG(status);
-        elf_gregset_t regs;
-        {
-          struct iovec iov;
-          iov.iov_base = &regs;
-          iov.iov_len = sizeof(regs);
-          PCHECK(ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov) == 0);
-          CHECK_EQ(iov.iov_len, sizeof(regs))
-              << ": ptrace regset is the wrong size";
-        }
-        if (stop_signal == SIGSEGV) {
-          // It's a SEGV, hopefully due to writing to the shared memory which is
-          // marked read-only. We record the instruction that faulted so we can
-          // look for it while single-stepping, then deliver the signal so the
-          // child can mark it read-write and then poke us to single-step that
-          // instruction.
-
-          CHECK(!restore_regs)
-              << ": Traced child got a SEGV while single-stepping";
-          // Save all the registers to resume execution at the current location
-          // in the child.
-          restore_regs = RestoreState(regs);
-          PCHECK(ptrace(PTRACE_CONT, pid, nullptr, SIGSEGV) == 0);
-          continue;
-        }
-        if (stop_signal == SIGTRAP) {
-          if (pass_trap) {
-            // This is the new SIGTRAP we generated, which we just want to pass
-            // through so the child's signal handler can restore the memory to
-            // read-only
-            PCHECK(ptrace(PTRACE_CONT, pid, nullptr, SIGTRAP) == 0);
-            pass_trap = false;
-            continue;
-          }
-          if (restore_regs) {
-            // Restore the state we saved before delivering the SEGV, and then
-            // single-step that one instruction.
-            struct iovec iov;
-            iov.iov_base = &restore_regs->regs;
-            iov.iov_len = sizeof(restore_regs->regs);
-            PCHECK(ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov) == 0);
-            restore_regs = std::nullopt;
-            PCHECK(ptrace(PTRACE_SINGLESTEP, pid, nullptr, nullptr) == 0);
-            continue;
-          }
-          // We executed the single instruction that originally faulted, so
-          // now deliver a SIGTRAP to the child so it can mark the memory
-          // read-only again.
-          pass_trap = true;
-          PCHECK(kill(pid, SIGTRAP) == 0);
-          PCHECK(ptrace(PTRACE_CONT, pid, nullptr, nullptr) == 0);
-          continue;
-        }
-        LOG(FATAL) << "Traced child was stopped with unexpected signal: "
-                   << static_cast<int>(WSTOPSIG(status));
-      }
-      if (WIFEXITED(status)) {
-        if (WEXITSTATUS(status) == 0) return true;
-        if (WEXITSTATUS(status) == kExitEarlyValue) return false;
-      }
-      DetectFatalFailures(status);
-      if (test_failure) *test_failure = true;
-      return false;
-    }
-  }
-}
-
-bool RunFunctionDieAtAndCheck(const LocklessQueueConfiguration &config,
-                              ::std::function<void(void *)> prepare,
-                              ::std::function<void(void *)> function,
-                              ::std::function<void(void *)> check,
-                              bool *test_failure, size_t die_at,
-                              bool prepare_in_child,
-                              const WritesArray *writes_in,
-                              WritesArray *writes_out) {
-  // Allocate shared memory.
-  GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
-  my_global_state->lockless_queue_memory_size = LocklessQueueMemorySize(config);
-  my_global_state->lockless_queue_memory = static_cast<void *>(
-      mmap(nullptr, my_global_state->lockless_queue_memory_size,
-           PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
-  CHECK_NE(MAP_FAILED, my_global_state->lockless_queue_memory);
-
-  // And the backup used to point the robust list at.
-  my_global_state->lockless_queue_memory_lock_backup = static_cast<void *>(
-      mmap(nullptr, my_global_state->lockless_queue_memory_size,
-           PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
-  CHECK_NE(MAP_FAILED, my_global_state->lockless_queue_memory_lock_backup);
-
-  // The writable offset tells us how to convert from a pointer in the queue to
-  // a pointer that is safe to write.  This is so robust futexes don't spin the
-  // kernel when the user maps a page PROT_READ, and the kernel tries to clear
-  // the futex there.
-  const uintptr_t writable_offset =
-      reinterpret_cast<uintptr_t>(
-          my_global_state->lockless_queue_memory_lock_backup) -
-      reinterpret_cast<uintptr_t>(my_global_state->lockless_queue_memory);
-
-  bool r;
-  // Do the actual test in a new thread so any locked mutexes will be cleaned up
-  // nicely with owner-died at the end.
-  ::std::thread test_thread([&prepare, &function, &check, test_failure, die_at,
-                             prepare_in_child, writes_in, writes_out,
-                             writable_offset, &r]() {
-    r = RunFunctionDieAt(prepare, function, test_failure, die_at,
-                         prepare_in_child, writable_offset, writes_in,
-                         writes_out);
-    if (::testing::Test::HasFailure()) {
-      r = false;
-      if (test_failure) *test_failure = true;
-      return;
-    }
-
-    check(
-        global_state.load(::std::memory_order_relaxed)->lockless_queue_memory);
-  });
-  test_thread.join();
-  return r;
-}
-
-// Tests function to make sure it handles dying after each store it makes to
-// shared memory. check should make sure function behaved correctly.
-// This will repeatedly create a new TestSharedMemory, run prepare, run
-// function, and then
-// run check, killing the process function is running in at various points. It
-// will stop if anything reports a fatal gtest failure.
-//
-// prepare_in_child being true means the prepare function will be run in the
-// child instead of the parent which doesn't die. This means that reference
-// counts on any objects it allocates won't be duplicated, but it also means
-// that any variables it sets will not be visible in check etc.
-void TestShmRobustness(const LocklessQueueConfiguration &config,
-                       ::std::function<void(void *)> prepare,
-                       ::std::function<void(void *)> function,
-                       ::std::function<void(void *)> check,
-                       bool prepare_in_child) {
-  // Map the global state and memory for the Writes array so it exists across
-  // the process boundary.
-  void *shared_allocations = static_cast<GlobalState *>(
-      mmap(nullptr, sizeof(GlobalState) + sizeof(WritesArray),
-           PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
-  CHECK_NE(MAP_FAILED, shared_allocations);
-
-  global_state.store(static_cast<GlobalState *>(shared_allocations));
-  shared_allocations = static_cast<void *>(
-      static_cast<uint8_t *>(shared_allocations) + sizeof(GlobalState));
-  WritesArray *expected_writes = static_cast<WritesArray *>(shared_allocations);
-  new (expected_writes) WritesArray();
-
-  bool test_failed = false;
-  ASSERT_TRUE(RunFunctionDieAtAndCheck(config, prepare, function, check,
-                                       &test_failed, 0, prepare_in_child,
-                                       nullptr, expected_writes));
-  if (test_failed) {
-    ADD_FAILURE();
-    return;
-  }
-
-  size_t die_at = 1;
-  while (true) {
-    SCOPED_TRACE("dying at " + ::std::to_string(die_at) + "/" +
-                 ::std::to_string(expected_writes->size()));
-    if (RunFunctionDieAtAndCheck(config, prepare, function, check, &test_failed,
-                                 die_at, prepare_in_child, expected_writes,
-                                 nullptr)) {
-      LOG(INFO) << "Tested " << die_at << " death points";
-      return;
-    }
-    if (test_failed) {
-      ADD_FAILURE();
-    }
-    if (::testing::Test::HasFailure()) return;
-    ++die_at;
-  }
-}
+#ifdef SUPPORTS_SHM_ROBUSTNESS_TEST
 
 namespace {
-pid_t gettid() { return syscall(SYS_gettid); }
-
-// Sets FUTEX_OWNER_DIED if the owner was tid.  This fakes what the kernel does
-// with a robust mutex.
-bool PretendOwnerDied(aos_mutex *mutex, pid_t tid) {
-  if ((mutex->futex & FUTEX_TID_MASK) == tid) {
-    mutex->futex = FUTEX_OWNER_DIED;
-    return true;
-  }
-  return false;
-}
-
 static int kPinnedMessageIndex = 0;
 
 constexpr monotonic_clock::duration kChannelStorageDuration =
@@ -567,12 +46,8 @@
 // Tests that death during sends is recovered from correctly.
 TEST(LocklessQueueTest, Death) {
   ::aos::testing::EnableTestLogging();
-  // Capture the tid in the child so we can tell if it died.  Use mmap so it
-  // works across the process boundary.
-  pid_t *tid =
-      static_cast<pid_t *>(mmap(nullptr, sizeof(pid_t), PROT_READ | PROT_WRITE,
-                                MAP_SHARED | MAP_ANONYMOUS, -1, 0));
-  CHECK_NE(MAP_FAILED, tid);
+
+  SharedTid tid;
 
   // Make a small queue so it is easier to debug.
   LocklessQueueConfiguration config;
@@ -584,14 +59,14 @@
 
   TestShmRobustness(
       config,
-      [config, tid](void *memory) {
+      [config, &tid](void *memory) {
         // Initialize the queue and grab the tid.
         LocklessQueue(
             reinterpret_cast<aos::ipc_lib::LocklessQueueMemory *>(memory),
             reinterpret_cast<aos::ipc_lib::LocklessQueueMemory *>(memory),
             config)
             .Initialize();
-        *tid = gettid();
+        tid.Set();
       },
       [config](void *memory) {
         LocklessQueue queue(
@@ -617,7 +92,7 @@
           }
         }
       },
-      [config, tid](void *raw_memory) {
+      [config, &tid](void *raw_memory) {
         ::aos::ipc_lib::LocklessQueueMemory *const memory =
             reinterpret_cast<::aos::ipc_lib::LocklessQueueMemory *>(raw_memory);
         // Confirm that we can create 2 senders (the number in the queue), and
@@ -629,10 +104,10 @@
         // TestShmRobustness doesn't handle robust futexes.  It is happy to just
         // not crash with them.  We know what they are, and what the tid of the
         // holder is.  So go pretend to be the kernel and fix it for it.
-        PretendOwnerDied(&memory->queue_setup_lock, *tid);
+        PretendOwnerDied(&memory->queue_setup_lock, tid.Get());
 
         for (size_t i = 0; i < config.num_senders; ++i) {
-          if (PretendOwnerDied(&memory->GetSender(i)->tid, *tid)) {
+          if (PretendOwnerDied(&memory->GetSender(i)->tid, tid.Get())) {
             // Print out before and after results if a sender died.  That is the
             // more fun case.
             print = true;
@@ -735,8 +210,7 @@
 
         // Confirm our message got through.
         EXPECT_EQ(last_data, '9') << ": Got through " << i;
-      },
-      /* prepare_in_child = true */ true);
+      });
 }
 
 #endif

diff --git a/aos/ipc_lib/lockless_queue_stepping.cc b/aos/ipc_lib/lockless_queue_stepping.cc
new file mode 100644
index 0000000..0ec5214
--- /dev/null
+++ b/aos/ipc_lib/lockless_queue_stepping.cc

@@ -0,0 +1,473 @@
+#include "aos/ipc_lib/lockless_queue_stepping.h"
+
+#include <dlfcn.h>
+#include <elf.h>
+#include <linux/futex.h>
+#include <sys/mman.h>
+#include <sys/procfs.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <wait.h>
+
+#include <memory>
+#include <thread>
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+#include "aos/ipc_lib/aos_sync.h"
+#include "aos/ipc_lib/lockless_queue_memory.h"
+#include "aos/ipc_lib/shm_observers.h"
+#include "aos/libc/aos_strsignal.h"
+#include "aos/testing/prevent_exit.h"
+
+#ifdef SUPPORTS_SHM_ROBUSTNESS_TEST
+
+namespace aos {
+namespace ipc_lib {
+namespace testing {
+
+namespace {
+pid_t gettid() { return syscall(SYS_gettid); }
+
+::std::atomic<GlobalState *> global_state;
+
+struct sigaction old_segv_handler, old_trap_handler;
+
+// Calls the original signal handler.
+bool CallChainedAction(const struct sigaction &action, int signal,
+                       siginfo_t *siginfo, void *context) {
+  if (action.sa_handler == SIG_IGN || action.sa_handler == SIG_DFL) {
+    return false;
+  }
+  if (action.sa_flags & SA_SIGINFO) {
+    action.sa_sigaction(signal, siginfo, context);
+  } else {
+    action.sa_handler(signal);
+  }
+  return true;
+}
+
+void segv_handler(int signal, siginfo_t *siginfo, void *context_void) {
+  GlobalState *my_global_state = GlobalState::Get();
+  const int saved_errno = errno;
+  SIMPLE_ASSERT(signal == SIGSEGV, "wrong signal for SIGSEGV handler");
+
+  // Only process memory addresses in our shared memory block.
+  if (!my_global_state->IsInLocklessQueueMemory(siginfo->si_addr)) {
+    if (CallChainedAction(old_segv_handler, signal, siginfo, context_void)) {
+      errno = saved_errno;
+      return;
+    } else {
+      SIMPLE_ASSERT(false, "actual SIGSEGV");
+    }
+  }
+  SIMPLE_ASSERT(my_global_state->state == DieAtState::kRunning,
+                "bad state for SIGSEGV");
+
+  my_global_state->HandleWrite(siginfo->si_addr);
+
+  my_global_state->ShmProtectOrDie(PROT_READ | PROT_WRITE);
+  my_global_state->state = DieAtState::kWriting;
+  errno = saved_errno;
+
+#if defined(__x86_64__)
+  __asm__ __volatile__("int $3" ::: "memory", "cc");
+#elif defined(__aarch64__)
+  __asm__ __volatile__("brk #0" ::: "memory", "cc");
+#else
+#error Unhandled architecture
+#endif
+}
+
+// The SEGV handler has set a breakpoint 1 instruction in the future.  This
+// clears it, marks memory readonly, and continues.
+void trap_handler(int signal, siginfo_t *, void * /*context*/) {
+  GlobalState *my_global_state = GlobalState::Get();
+  const int saved_errno = errno;
+  SIMPLE_ASSERT(signal == SIGTRAP, "wrong signal for SIGTRAP handler");
+
+  my_global_state->state = DieAtState::kWriting;
+  SIMPLE_ASSERT(my_global_state->state == DieAtState::kWriting,
+                "bad state for SIGTRAP");
+  my_global_state->ShmProtectOrDie(PROT_READ);
+  my_global_state->state = DieAtState::kRunning;
+  errno = saved_errno;
+}
+
+// Installs the signal handler.
+void InstallHandler(int signal, void (*handler)(int, siginfo_t *, void *),
+                    struct sigaction *old_action) {
+  struct sigaction action;
+  memset(&action, 0, sizeof(action));
+  action.sa_sigaction = handler;
+  // We don't do a full normal signal handler exit with ptrace, so SA_NODEFER is
+  // necessary to keep our signal handler active.
+  action.sa_flags = SA_RESTART | SA_SIGINFO | SA_NODEFER;
+#ifdef AOS_SANITIZER_thread
+  // Tsan messes with signal handlers to check for race conditions, and it
+  // causes problems, so we have to work around it for SIGTRAP.
+  if (signal == SIGTRAP) {
+    typedef int (*SigactionType)(int, const struct sigaction *,
+                                 struct sigaction *);
+    SigactionType real_sigaction =
+        reinterpret_cast<SigactionType>(dlsym(RTLD_NEXT, "sigaction"));
+    if (sigaction == real_sigaction) {
+      LOG(WARNING) << "failed to work around tsan signal handling weirdness";
+    }
+    PCHECK(real_sigaction(signal, &action, old_action) == 0);
+    return;
+  }
+#endif
+  PCHECK(sigaction(signal, &action, old_action) == 0);
+}
+
+// A mutex lock is about to happen.  Mark the memory rw, and check to see if we
+// should die.
+void futex_before(void *address, bool) {
+  GlobalState *my_global_state = GlobalState::Get();
+  if (my_global_state->IsInLocklessQueueMemory(address)) {
+    assert(my_global_state->state == DieAtState::kRunning);
+    my_global_state->HandleWrite(address);
+    my_global_state->ShmProtectOrDie(PROT_READ | PROT_WRITE);
+    my_global_state->state = DieAtState::kWriting;
+  }
+}
+
+// We have a manual trap for mutexes.  Check to see if we were supposed to die
+// on this write (the compare/exchange for the mutex), and mark the memory ro
+// again.
+void futex_after(void *address, bool) {
+  GlobalState *my_global_state = GlobalState::Get();
+  if (my_global_state->IsInLocklessQueueMemory(address)) {
+    assert(my_global_state->state == DieAtState::kWriting);
+    my_global_state->ShmProtectOrDie(PROT_READ);
+    my_global_state->state = DieAtState::kRunning;
+  }
+}
+
+}  // namespace
+
+void GlobalState::HandleWrite(void *address) {
+  uintptr_t address_offset = reinterpret_cast<uintptr_t>(address) -
+                             reinterpret_cast<uintptr_t>(lockless_queue_memory);
+  if (writes_in != nullptr) {
+    SIMPLE_ASSERT(writes_in->At(current_location) == address_offset,
+                  "wrong write order");
+  }
+  if (writes_out != nullptr) {
+    writes_out->Add(address_offset);
+  }
+  if (die_at != 0) {
+    if (die_at == current_location) {
+      _exit(kExitEarlyValue);
+    }
+  }
+  ++current_location;
+}
+
+GlobalState *GlobalState::Get() {
+  return global_state.load(::std::memory_order_relaxed);
+}
+
+std::tuple<GlobalState *, WritesArray *> GlobalState::MakeGlobalState() {
+  // Map the global state and memory for the Writes array so it exists across
+  // the process boundary.
+  void *shared_allocations = static_cast<GlobalState *>(
+      mmap(nullptr, sizeof(GlobalState) + sizeof(WritesArray),
+           PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+  CHECK_NE(MAP_FAILED, shared_allocations);
+
+  global_state.store(static_cast<GlobalState *>(shared_allocations));
+  void *expected_writes_shared_allocations = static_cast<void *>(
+      static_cast<uint8_t *>(shared_allocations) + sizeof(GlobalState));
+  WritesArray *expected_writes =
+      static_cast<WritesArray *>(expected_writes_shared_allocations);
+  new (expected_writes) WritesArray();
+  return std::make_pair(static_cast<GlobalState *>(shared_allocations),
+                        expected_writes);
+}
+
+bool GlobalState::IsInLocklessQueueMemory(void *address) {
+  void *read_lockless_queue_memory = lockless_queue_memory;
+  if (address < read_lockless_queue_memory) {
+    return false;
+    if (reinterpret_cast<uintptr_t>(address) >
+        reinterpret_cast<uintptr_t>(read_lockless_queue_memory) +
+            lockless_queue_memory_size)
+      return false;
+  }
+  return true;
+}
+
+void GlobalState::ShmProtectOrDie(int prot) {
+  PCHECK(mprotect(lockless_queue_memory, lockless_queue_memory_size, prot) !=
+         -1)
+      << ": mprotect(" << lockless_queue_memory << ", "
+      << lockless_queue_memory_size << ", 0x" << std::hex << prot << ") failed";
+}
+
+void GlobalState::RegisterSegvAndTrapHandlers() {
+  InstallHandler(SIGSEGV, segv_handler, &old_segv_handler);
+  InstallHandler(SIGTRAP, trap_handler, &old_trap_handler);
+  CHECK_EQ(old_trap_handler.sa_handler, SIG_DFL);
+  linux_code::ipc_lib::SetShmAccessorObservers(futex_before, futex_after);
+}
+
+// gtest only allows creating fatal failures in functions returning void...
+// status is from wait(2).
+void DetectFatalFailures(int status) {
+  if (WIFEXITED(status)) {
+    FAIL() << " child returned status "
+           << ::std::to_string(WEXITSTATUS(status));
+  } else if (WIFSIGNALED(status)) {
+    FAIL() << " child exited because of signal "
+           << aos_strsignal(WTERMSIG(status));
+  } else {
+    FAIL() << " child exited with status " << ::std::hex << status;
+  }
+}
+
+// Returns true if it runs all the way through.
+bool RunFunctionDieAt(::std::function<void(void *)> prepare,
+                      ::std::function<void(void *)> function,
+                      bool *test_failure, size_t die_at,
+                      uintptr_t writable_offset, const WritesArray *writes_in,
+                      WritesArray *writes_out) {
+  GlobalState *my_global_state = GlobalState::Get();
+  my_global_state->writes_in = writes_in;
+  my_global_state->writes_out = writes_out;
+  my_global_state->die_at = die_at;
+  my_global_state->current_location = 0;
+  my_global_state->state = DieAtState::kDisabled;
+
+  const pid_t pid = fork();
+  PCHECK(pid != -1) << ": fork() failed";
+  if (pid == 0) {
+    // Run the test.
+    ::aos::testing::PreventExit();
+
+    prepare(my_global_state->lockless_queue_memory);
+
+    // Update the robust list offset.
+    linux_code::ipc_lib::SetRobustListOffset(writable_offset);
+    // Install a segv handler (to detect writes to the memory block), and a trap
+    // handler so we can single step.
+    my_global_state->RegisterSegvAndTrapHandlers();
+
+    PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    my_global_state->ShmProtectOrDie(PROT_READ);
+    my_global_state->state = DieAtState::kRunning;
+
+    function(my_global_state->lockless_queue_memory);
+    my_global_state->state = DieAtState::kDisabled;
+    my_global_state->ShmProtectOrDie(PROT_READ | PROT_WRITE);
+    _exit(0);
+  } else {
+    // Annoying wrapper type because elf_gregset_t is an array, which C++
+    // handles poorly.
+    struct RestoreState {
+      RestoreState(elf_gregset_t regs_in) {
+        memcpy(regs, regs_in, sizeof(regs));
+      }
+      elf_gregset_t regs;
+    };
+    std::optional<RestoreState> restore_regs;
+    bool pass_trap = false;
+    // Wait until the child process dies.
+    while (true) {
+      int status;
+      pid_t waited_on = waitpid(pid, &status, 0);
+      if (waited_on == -1) {
+        if (errno == EINTR) continue;
+        PCHECK(false) << ": waitpid(" << pid << ", " << &status
+                      << ", 0) failed";
+      }
+      CHECK_EQ(waited_on, pid)
+          << ": waitpid got child " << waited_on << " instead of " << pid;
+      if (WIFSTOPPED(status)) {
+        // The child was stopped via ptrace.
+        const int stop_signal = WSTOPSIG(status);
+        elf_gregset_t regs;
+        {
+          struct iovec iov;
+          iov.iov_base = &regs;
+          iov.iov_len = sizeof(regs);
+          PCHECK(ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov) == 0);
+          CHECK_EQ(iov.iov_len, sizeof(regs))
+              << ": ptrace regset is the wrong size";
+        }
+        if (stop_signal == SIGSEGV) {
+          // It's a SEGV, hopefully due to writing to the shared memory which is
+          // marked read-only. We record the instruction that faulted so we can
+          // look for it while single-stepping, then deliver the signal so the
+          // child can mark it read-write and then poke us to single-step that
+          // instruction.
+
+          CHECK(!restore_regs)
+              << ": Traced child got a SEGV while single-stepping";
+          // Save all the registers to resume execution at the current location
+          // in the child.
+          restore_regs = RestoreState(regs);
+          PCHECK(ptrace(PTRACE_CONT, pid, nullptr, SIGSEGV) == 0);
+          continue;
+        }
+        if (stop_signal == SIGTRAP) {
+          if (pass_trap) {
+            // This is the new SIGTRAP we generated, which we just want to pass
+            // through so the child's signal handler can restore the memory to
+            // read-only
+            PCHECK(ptrace(PTRACE_CONT, pid, nullptr, SIGTRAP) == 0);
+            pass_trap = false;
+            continue;
+          }
+          if (restore_regs) {
+            // Restore the state we saved before delivering the SEGV, and then
+            // single-step that one instruction.
+            struct iovec iov;
+            iov.iov_base = &restore_regs->regs;
+            iov.iov_len = sizeof(restore_regs->regs);
+            PCHECK(ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov) == 0);
+            restore_regs = std::nullopt;
+            PCHECK(ptrace(PTRACE_SINGLESTEP, pid, nullptr, nullptr) == 0);
+            continue;
+          }
+          // We executed the single instruction that originally faulted, so
+          // now deliver a SIGTRAP to the child so it can mark the memory
+          // read-only again.
+          pass_trap = true;
+          PCHECK(kill(pid, SIGTRAP) == 0);
+          PCHECK(ptrace(PTRACE_CONT, pid, nullptr, nullptr) == 0);
+          continue;
+        }
+        LOG(FATAL) << "Traced child was stopped with unexpected signal: "
+                   << static_cast<int>(WSTOPSIG(status));
+      }
+      if (WIFEXITED(status)) {
+        if (WEXITSTATUS(status) == 0) return true;
+        if (WEXITSTATUS(status) == kExitEarlyValue) return false;
+      }
+      DetectFatalFailures(status);
+      if (test_failure) *test_failure = true;
+      return false;
+    }
+  }
+}
+
+bool RunFunctionDieAtAndCheck(const LocklessQueueConfiguration &config,
+                              ::std::function<void(void *)> prepare,
+                              ::std::function<void(void *)> function,
+                              ::std::function<void(void *)> check,
+                              bool *test_failure, size_t die_at,
+                              const WritesArray *writes_in,
+                              WritesArray *writes_out) {
+  // Allocate shared memory.
+  GlobalState *my_global_state = GlobalState::Get();
+  my_global_state->lockless_queue_memory_size = LocklessQueueMemorySize(config);
+  my_global_state->lockless_queue_memory = static_cast<void *>(
+      mmap(nullptr, my_global_state->lockless_queue_memory_size,
+           PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+  CHECK_NE(MAP_FAILED, my_global_state->lockless_queue_memory);
+
+  // And the backup used to point the robust list at.
+  my_global_state->lockless_queue_memory_lock_backup = static_cast<void *>(
+      mmap(nullptr, my_global_state->lockless_queue_memory_size,
+           PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+  CHECK_NE(MAP_FAILED, my_global_state->lockless_queue_memory_lock_backup);
+
+  // The writable offset tells us how to convert from a pointer in the queue to
+  // a pointer that is safe to write.  This is so robust futexes don't spin the
+  // kernel when the user maps a page PROT_READ, and the kernel tries to clear
+  // the futex there.
+  const uintptr_t writable_offset =
+      reinterpret_cast<uintptr_t>(
+          my_global_state->lockless_queue_memory_lock_backup) -
+      reinterpret_cast<uintptr_t>(my_global_state->lockless_queue_memory);
+
+  bool r;
+  // Do the actual test in a new thread so any locked mutexes will be cleaned up
+  // nicely with owner-died at the end.
+  ::std::thread test_thread([&prepare, &function, &check, test_failure, die_at,
+                             writes_in, writes_out, writable_offset, &r]() {
+    r = RunFunctionDieAt(prepare, function, test_failure, die_at,
+                         writable_offset, writes_in, writes_out);
+    if (::testing::Test::HasFailure()) {
+      r = false;
+      if (test_failure) *test_failure = true;
+      return;
+    }
+
+    check(GlobalState::Get()->lockless_queue_memory);
+  });
+  test_thread.join();
+  return r;
+}
+
+// Tests function to make sure it handles dying after each store it makes to
+// shared memory. check should make sure function behaved correctly.
+// This will repeatedly create a new TestSharedMemory, run prepare, run
+// function, and then
+// run check, killing the process function is running in at various points. It
+// will stop if anything reports a fatal gtest failure.
+void TestShmRobustness(const LocklessQueueConfiguration &config,
+                       ::std::function<void(void *)> prepare,
+                       ::std::function<void(void *)> function,
+                       ::std::function<void(void *)> check) {
+  auto [my_global_state, expected_writes] = GlobalState::MakeGlobalState();
+
+  bool test_failed = false;
+  ASSERT_TRUE(RunFunctionDieAtAndCheck(config, prepare, function, check,
+                                       &test_failed, 0, nullptr,
+                                       expected_writes));
+  if (test_failed) {
+    ADD_FAILURE();
+    return;
+  }
+
+  size_t die_at = 1;
+  while (true) {
+    SCOPED_TRACE("dying at " + ::std::to_string(die_at) + "/" +
+                 ::std::to_string(expected_writes->size()));
+    if (RunFunctionDieAtAndCheck(config, prepare, function, check, &test_failed,
+                                 die_at, expected_writes, nullptr)) {
+      LOG(INFO) << "Tested " << die_at << " death points";
+      return;
+    }
+    if (test_failed) {
+      ADD_FAILURE();
+    }
+    if (::testing::Test::HasFailure()) return;
+    ++die_at;
+  }
+}
+
+SharedTid::SharedTid() {
+  // Capture the tid in the child so we can tell if it died.  Use mmap so it
+  // works across the process boundary.
+  tid_ =
+      static_cast<pid_t *>(mmap(nullptr, sizeof(pid_t), PROT_READ | PROT_WRITE,
+                                MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+  CHECK_NE(MAP_FAILED, tid_);
+}
+
+SharedTid::~SharedTid() { CHECK_EQ(munmap(tid_, sizeof(pid_t)), 0); }
+
+void SharedTid::Set() { *tid_ = gettid(); }
+
+pid_t SharedTid::Get() { return *tid_; }
+
+bool PretendOwnerDied(aos_mutex *mutex, pid_t tid) {
+  if ((mutex->futex & FUTEX_TID_MASK) == tid) {
+    mutex->futex = FUTEX_OWNER_DIED;
+    return true;
+  }
+  return false;
+}
+
+}  // namespace testing
+}  // namespace ipc_lib
+}  // namespace aos
+
+#endif  // SUPPORTS_SHM_ROBSTNESS_TEST

diff --git a/aos/ipc_lib/lockless_queue_stepping.h b/aos/ipc_lib/lockless_queue_stepping.h
new file mode 100644
index 0000000..b3eb6e3
--- /dev/null
+++ b/aos/ipc_lib/lockless_queue_stepping.h

@@ -0,0 +1,153 @@
+#ifndef AOS_IPC_LIB_LOCKLESS_QUEUE_STEPPING_H_
+#define AOS_IPC_LIB_LOCKLESS_QUEUE_STEPPING_H_
+
+#include <cinttypes>
+#include <functional>
+
+#include "aos/ipc_lib/lockless_queue.h"
+#include "aos/ipc_lib/lockless_queue_memory.h"
+
+namespace aos {
+namespace ipc_lib {
+namespace testing {
+
+#if defined(__ARM_EABI__)
+// There are various reasons why we might not actually be able to do this
+// testing, but we still want to run the functions to test anything they test
+// other than shm robustness.
+//
+// ARM doesn't have a nice way to do single-stepping, and I don't feel like
+// dealing with editing the next instruction to be a breakpoint and then
+// changing it back.
+#else
+
+#define SUPPORTS_SHM_ROBUSTNESS_TEST
+
+// This code currently supports amd64 only, but it
+// shouldn't be hard to port to i386 (it should just be using the name for
+// only the smaller part of the flags register), but that's not tested, and
+// porting it to any other architecture is more work.
+// Currently, we skip doing anything exciting on arm (just run the code without
+// any robustness testing) and refuse to compile anywhere else.
+
+#define SIMPLE_ASSERT(condition, message)                                 \
+  do {                                                                    \
+    if (!(condition)) {                                                   \
+      static const char kMessage[] = message "\n";                        \
+      if (write(STDERR_FILENO, kMessage, sizeof(kMessage) - 1) !=         \
+          (sizeof(kMessage) - 1)) {                                       \
+        static const char kFailureMessage[] = "writing failed\n";         \
+        __attribute__((unused)) int ignore = write(                       \
+            STDERR_FILENO, kFailureMessage, sizeof(kFailureMessage) - 1); \
+      }                                                                   \
+      abort();                                                            \
+    }                                                                     \
+  } while (false)
+
+// Array to track writes to memory, and make sure they happen in the right
+// order.
+class WritesArray {
+ public:
+  uintptr_t At(size_t location) const {
+    SIMPLE_ASSERT(location < size_, "too far into writes array");
+    return writes_[location];
+  }
+  void Add(uintptr_t pointer) {
+    SIMPLE_ASSERT(size_ < kSize, "too many writes");
+    writes_[size_++] = pointer;
+  }
+
+  size_t size() const { return size_; }
+
+ private:
+  static const size_t kSize = 20000;
+
+  uintptr_t writes_[kSize];
+  size_t size_ = 0;
+};
+
+enum class DieAtState {
+  // No SEGVs should be happening.
+  kDisabled,
+  // SEGVs are fine.  Normal operation.
+  kRunning,
+  // We are manipulating a mutex.  No SEGVs should be happening.
+  kWriting,
+};
+
+// What we exit with when we're exiting in the middle.
+const int kExitEarlyValue = 123;
+
+// We have to keep track of everything in a global variable because there's no
+// other way for the signal handlers to find it.
+struct GlobalState {
+  // Constructs the singleton GlobalState.
+  static std::tuple<GlobalState *, WritesArray *> MakeGlobalState();
+
+  // Returns the global state.  Atomic and safe from signal handlers.
+  static GlobalState *Get();
+
+  // Pointer to the queue memory, and its size.
+  void *lockless_queue_memory;
+  size_t lockless_queue_memory_size;
+
+  // Pointer to a second block of memory the same size.  This (on purpose) has
+  // the same size as lockless_queue_memory so we can point the robust mutexes
+  // here.
+  void *lockless_queue_memory_lock_backup;
+
+  // Expected writes.
+  const WritesArray *writes_in;
+  // Actual writes.
+  WritesArray *writes_out;
+  // Location to die at, and how far we have gotten.
+  size_t die_at, current_location;
+  // State.
+  DieAtState state;
+
+  // Returns true if the address is in the queue memory chunk.
+  bool IsInLocklessQueueMemory(void *address);
+
+  // Calls mprotect(2) for the entire shared memory region with the given prot.
+  void ShmProtectOrDie(int prot);
+
+  // Checks a write into the queue and conditionally dies.  Tracks the write.
+  void HandleWrite(void *address);
+
+  // Registers the handlers required to trap writes.
+  void RegisterSegvAndTrapHandlers();
+};
+
+void TestShmRobustness(const LocklessQueueConfiguration &config,
+                       ::std::function<void(void *)> prepare,
+                       ::std::function<void(void *)> function,
+                       ::std::function<void(void *)> check);
+
+// Capture the tid in the child so we can tell if it died.  Uses mmap so it
+// works across the process boundary.
+class SharedTid {
+ public:
+  SharedTid();
+  ~SharedTid();
+
+  // Captures the tid.
+  void Set();
+
+  // Returns the captured tid.
+  pid_t Get();
+
+ private:
+  pid_t *tid_;
+};
+
+// Sets FUTEX_OWNER_DIED if the owner was tid.  This fakes what the kernel does
+// with a robust mutex.
+bool PretendOwnerDied(aos_mutex *mutex, pid_t tid);
+
+#endif
+
+}  // namespace testing
+}  // namespace ipc_lib
+}  // namespace aos
+
+#endif  // AOS_IPC_LIB_LOCKLESS_QUEUE_STEPPING_H_

diff --git a/aos/ipc_lib/lockless_queue_test.cc b/aos/ipc_lib/lockless_queue_test.cc
index 93ae2a3..58e093f 100644
--- a/aos/ipc_lib/lockless_queue_test.cc
+++ b/aos/ipc_lib/lockless_queue_test.cc

@@ -1,5 +1,6 @@
 #include "aos/ipc_lib/lockless_queue.h"
 
+#include <sys/mman.h>
 #include <unistd.h>
 #include <wait.h>
 
@@ -16,6 +17,8 @@
 #include "aos/events/epoll.h"
 #include "aos/ipc_lib/aos_sync.h"
 #include "aos/ipc_lib/event.h"
+#include "aos/ipc_lib/lockless_queue_memory.h"
+#include "aos/ipc_lib/lockless_queue_stepping.h"
 #include "aos/ipc_lib/queue_racer.h"
 #include "aos/ipc_lib/signalfd.h"
 #include "aos/realtime.h"
@@ -421,6 +424,148 @@
          static_cast<double>(kNumMessages) / elapsed_seconds);
 }
 
+#if defined(SUPPORTS_SHM_ROBUSTNESS_TEST)
+
+// Verifies that LatestIndex points to the same message as the logic from
+// "FetchNext", which increments the index until it gets "NOTHING_NEW" back.
+// This is so we can confirm fetchers and watchers all see the same message at
+// the same point in time.
+int VerifyMessages(LocklessQueue *queue, LocklessQueueMemory *memory) {
+  LocklessQueueReader reader(*queue);
+
+  const ipc_lib::QueueIndex queue_index = reader.LatestIndex();
+  if (!queue_index.valid()) {
+    return 0;
+  }
+
+  // Now loop through the queue and make sure the number in the snprintf
+  // increments.
+  char last_data = '0';
+  int i = 0;
+
+  // Callback which isn't set so we don't exercise the conditional reading code.
+  std::function<bool(const Context &)> should_read_callback;
+
+  // Now, read as far as we can until we get NOTHING_NEW.  This simulates
+  // FetchNext.
+  while (true) {
+    monotonic_clock::time_point monotonic_sent_time;
+    realtime_clock::time_point realtime_sent_time;
+    monotonic_clock::time_point monotonic_remote_time;
+    realtime_clock::time_point realtime_remote_time;
+    uint32_t remote_queue_index;
+    UUID source_boot_uuid;
+    char read_data[1024];
+    size_t length;
+
+    LocklessQueueReader::Result read_result = reader.Read(
+        i, &monotonic_sent_time, &realtime_sent_time, &monotonic_remote_time,
+        &realtime_remote_time, &remote_queue_index, &source_boot_uuid, &length,
+        &(read_data[0]), std::ref(should_read_callback));
+
+    if (read_result != LocklessQueueReader::Result::GOOD) {
+      if (read_result == LocklessQueueReader::Result::TOO_OLD) {
+        ++i;
+        continue;
+      }
+      CHECK(read_result == LocklessQueueReader::Result::NOTHING_NEW)
+          << ": " << static_cast<int>(read_result);
+      break;
+    }
+
+    EXPECT_GT(read_data[LocklessQueueMessageDataSize(memory) - length + 6],
+              last_data)
+        << ": Got " << read_data << " for " << i;
+    last_data = read_data[LocklessQueueMessageDataSize(memory) - length + 6];
+
+    ++i;
+  }
+
+  // The latest queue index should match the fetched queue index.
+  if (i == 0) {
+    EXPECT_FALSE(queue_index.valid());
+  } else {
+    EXPECT_EQ(queue_index.index(), i - 1);
+  }
+  return i;
+}
+
+// Tests that at all points in the publish step, fetch == fetch next.  This
+// means that there is an atomic point at which the message is viewed as visible
+// to consumers.  Do this by killing the writer after each change to shared
+// memory, and confirming fetch == fetch next each time.
+TEST_F(LocklessQueueTest, FetchEqFetchNext) {
+  SharedTid tid;
+
+  // Make a small queue so it is easier to debug.
+  LocklessQueueConfiguration config;
+  config.num_watchers = 1;
+  config.num_senders = 2;
+  config.num_pinners = 0;
+  config.queue_size = 3;
+  config.message_data_size = 32;
+
+  TestShmRobustness(
+      config,
+      [config, &tid](void *memory) {
+        // Initialize the queue.
+        LocklessQueue(
+            reinterpret_cast<aos::ipc_lib::LocklessQueueMemory *>(memory),
+            reinterpret_cast<aos::ipc_lib::LocklessQueueMemory *>(memory),
+            config)
+            .Initialize();
+        tid.Set();
+      },
+      [config](void *memory) {
+        LocklessQueue queue(
+            reinterpret_cast<aos::ipc_lib::LocklessQueueMemory *>(memory),
+            reinterpret_cast<aos::ipc_lib::LocklessQueueMemory *>(memory),
+            config);
+        // Now try to write some messages.  We will get killed a bunch as this
+        // tries to happen.
+        LocklessQueueSender sender =
+            LocklessQueueSender::Make(queue, chrono::nanoseconds(1)).value();
+        for (int i = 0; i < 5; ++i) {
+          char data[100];
+          size_t s = snprintf(data, sizeof(data), "foobar%d", i + 1);
+          ASSERT_EQ(sender.Send(data, s + 1, monotonic_clock::min_time,
+                                realtime_clock::min_time, 0xffffffffl,
+                                UUID::Zero(), nullptr, nullptr, nullptr),
+                    LocklessQueueSender::Result::GOOD);
+        }
+      },
+      [config, &tid](void *raw_memory) {
+        ::aos::ipc_lib::LocklessQueueMemory *const memory =
+            reinterpret_cast<::aos::ipc_lib::LocklessQueueMemory *>(raw_memory);
+        LocklessQueue queue(memory, memory, config);
+        PretendOwnerDied(&memory->queue_setup_lock, tid.Get());
+
+        if (VLOG_IS_ON(1)) {
+          PrintLocklessQueueMemory(memory);
+        }
+
+        const int i = VerifyMessages(&queue, memory);
+
+        LocklessQueueSender sender =
+            LocklessQueueSender::Make(queue, chrono::nanoseconds(1)).value();
+        {
+          char data[100];
+          size_t s = snprintf(data, sizeof(data), "foobar%d", i + 1);
+          ASSERT_EQ(sender.Send(data, s + 1, monotonic_clock::min_time,
+                                realtime_clock::min_time, 0xffffffffl,
+                                UUID::Zero(), nullptr, nullptr, nullptr),
+                    LocklessQueueSender::Result::GOOD);
+        }
+
+        // Now, make sure we can send 1 message and receive it to confirm we
+        // haven't corrupted next_queue_index irrevocably.
+        const int newi = VerifyMessages(&queue, memory);
+        EXPECT_EQ(newi, i + 1);
+      });
+}
+
+#endif
+
 }  // namespace testing
 }  // namespace ipc_lib
 }  // namespace aos

diff --git a/aos/ipc_lib/queue_racer.h b/aos/ipc_lib/queue_racer.h
index 87f2cce..f0a2684 100644
--- a/aos/ipc_lib/queue_racer.h
+++ b/aos/ipc_lib/queue_racer.h

@@ -62,7 +62,8 @@
  private:
   // Wipes the queue memory out so we get a clean start.
   void Reset() {
-    memset(queue_.memory(), 0, LocklessQueueMemorySize(queue_.config()));
+    memset(reinterpret_cast<void *>(queue_.memory()), 0,
+           LocklessQueueMemorySize(queue_.config()));
   }
 
   // This is a separate method so that when all the ASSERT_* methods, we still

diff --git a/aos/ipc_lib/shm_base.cc b/aos/ipc_lib/shm_base.cc
new file mode 100644
index 0000000..22bf915
--- /dev/null
+++ b/aos/ipc_lib/shm_base.cc

@@ -0,0 +1,9 @@
+#include "aos/ipc_lib/shm_base.h"
+
+DEFINE_string(shm_base, "/dev/shm/aos",
+              "Directory to place queue backing mmaped files in.");
+namespace aos::testing {
+void SetShmBase(const std::string_view base) {
+  FLAGS_shm_base = std::string(base) + "/aos";
+}
+}  // namespace aos::testing

diff --git a/aos/ipc_lib/shm_base.h b/aos/ipc_lib/shm_base.h
new file mode 100644
index 0000000..b70df06
--- /dev/null
+++ b/aos/ipc_lib/shm_base.h

@@ -0,0 +1,11 @@
+#ifndef AOS_IPC_LIB_SHM_BASE_H_
+#define AOS_IPC_LIB_SHM_BASE_H_
+
+#include "gflags/gflags.h"
+
+DECLARE_string(shm_base);
+
+namespace aos::testing {
+void SetShmBase(const std::string_view base);
+}
+#endif  // AOS_IPC_LIB_SHM_BASE_H_

diff --git a/aos/json_to_flatbuffer.h b/aos/json_to_flatbuffer.h
index 01a3c3f..b664036 100644
--- a/aos/json_to_flatbuffer.h
+++ b/aos/json_to_flatbuffer.h

@@ -86,14 +86,19 @@
 
 // Writes a Flatbuffer to a file, or dies.
 template <typename T>
-inline void WriteFlatbufferToJson(std::string_view filename,
-                                  const Flatbuffer<T> &msg) {
+inline void WriteFlatbufferToJson(std::string_view filename, const T *msg) {
   std::ofstream json_file(std::string(filename), std::ios::out);
   CHECK(json_file) << ": Couldn't open " << filename;
   json_file << FlatbufferToJson(msg);
   json_file.close();
 }
 
+template <typename T>
+inline void WriteFlatbufferToJson(std::string_view filename,
+                                  const Flatbuffer<T> &msg) {
+  WriteFlatbufferToJson(filename, &msg.message());
+}
+
 // Writes a NonSizePrefixedFlatbuffer to a binary file, or dies.
 template <typename T>
 inline void WriteFlatbufferToFile(std::string_view filename,

diff --git a/aos/network/BUILD b/aos/network/BUILD
index 75173e7..d1a79d0 100644
--- a/aos/network/BUILD
+++ b/aos/network/BUILD

@@ -385,6 +385,7 @@
         ":sctp_config_fbs",
         ":sctp_config_request_fbs",
         ":timestamp_fbs",
+        "//aos/containers:ring_buffer",
         "//aos/events:shm_event_loop",
         "//aos/events/logging:log_reader",
     ],

diff --git a/aos/network/message_bridge_client.fbs b/aos/network/message_bridge_client.fbs
index 40c0698..2f6461d 100644
--- a/aos/network/message_bridge_client.fbs
+++ b/aos/network/message_bridge_client.fbs

@@ -35,6 +35,9 @@
 
   // Number of times we've established a connection to the server.
   connection_count:uint (id: 8);
+
+  // Number of timestamps we have failed to send.
+  timestamp_send_failures: uint (id: 9);
 }
 
 // Statistics for all clients.

diff --git a/aos/network/message_bridge_client_lib.cc b/aos/network/message_bridge_client_lib.cc
index e42eaa0..19f3420 100644
--- a/aos/network/message_bridge_client_lib.cc
+++ b/aos/network/message_bridge_client_lib.cc

@@ -134,6 +134,10 @@
   connect_timer_ = event_loop_->AddTimer([this]() { SendConnect(); });
   connect_timer_->set_name(std::string("connect_") +
                            remote_node_->name()->str());
+  timestamp_retry_buffer_ =
+      event_loop_->AddTimer([this]() { SendTimestamps(); });
+  timestamp_retry_buffer_->set_name(std::string("timestamp_retry_") +
+                                    remote_node_->name()->str());
   event_loop_->OnRun(
       [this]() { connect_timer_->Schedule(event_loop_->monotonic_now()); });
 
@@ -265,6 +269,11 @@
   connect_timer_->Schedule(
       event_loop_->monotonic_now() + chrono::milliseconds(100),
       chrono::milliseconds(100));
+
+  // Don't try resending timestamps since there is nobody to send them to.  Let
+  // Connect handle that instead.  HandleData will retry when they reply.
+  timestamp_retry_buffer_->Disable();
+
   client_status_->Disconnect(client_index_);
   client_status_->SampleReset(client_index_);
 }
@@ -304,6 +313,7 @@
         chrono::nanoseconds(remote_data->monotonic_sent_time()));
 
     // Publish the message.
+    UUID remote_boot_uuid = UUID::FromVector(remote_data->boot_uuid());
     RawSender *sender = channel_state->sender.get();
     sender->CheckOk(sender->Send(
         remote_data->data()->data(), remote_data->data()->size(),
@@ -311,48 +321,60 @@
             chrono::nanoseconds(remote_data->monotonic_sent_time())),
         realtime_clock::time_point(
             chrono::nanoseconds(remote_data->realtime_sent_time())),
-        remote_data->queue_index(),
-        UUID::FromVector(remote_data->boot_uuid())));
+        remote_data->queue_index(), remote_boot_uuid));
 
     client_status_->SampleFilter(
         client_index_,
         monotonic_clock::time_point(
             chrono::nanoseconds(remote_data->monotonic_sent_time())),
-        sender->monotonic_sent_time(),
-        UUID::FromVector(remote_data->boot_uuid()));
+        sender->monotonic_sent_time(), remote_boot_uuid);
 
     if (stream_reply_with_timestamp_[stream]) {
-      // TODO(austin): Send back less if we are only acking.  Maybe only a
-      // stream id?  Nothing if we are only forwarding?
+      SavedTimestamp timestamp{
+          .channel_index = remote_data->channel_index(),
+          .monotonic_sent_time = remote_data->monotonic_sent_time(),
+          .realtime_sent_time = remote_data->realtime_sent_time(),
+          .queue_index = remote_data->queue_index(),
+          .monotonic_remote_time =
+              sender->monotonic_sent_time().time_since_epoch().count(),
+          .realtime_remote_time =
+              sender->realtime_sent_time().time_since_epoch().count(),
+          .remote_queue_index = sender->sent_queue_index(),
+      };
 
-      // Now fill out the message received reply.  This uses a MessageHeader
-      // container so it can be directly logged.
-      message_reception_reply_.mutable_message()->mutate_channel_index(
-          remote_data->channel_index());
-      message_reception_reply_.mutable_message()->mutate_monotonic_sent_time(
-          remote_data->monotonic_sent_time());
-      message_reception_reply_.mutable_message()->mutate_realtime_sent_time(
-          remote_data->realtime_sent_time());
-      message_reception_reply_.mutable_message()->mutate_queue_index(
-          remote_data->queue_index());
+      // Sort out if we should try to queue the timestamp or just send it
+      // directly.  We don't want to get too far ahead.
+      //
+      // In this case, the other side rebooted.  There is no sense telling it
+      // about previous timestamps.  Wipe them and reset which boot we are.
+      if (remote_boot_uuid != timestamps_uuid_) {
+        timestamp_buffer_.Reset();
+        timestamps_uuid_ = remote_boot_uuid;
+      }
 
-      // And capture the relevant data needed to generate the forwarding
-      // MessageHeader.
-      message_reception_reply_.mutable_message()->mutate_monotonic_remote_time(
-          sender->monotonic_sent_time().time_since_epoch().count());
-      message_reception_reply_.mutable_message()->mutate_realtime_remote_time(
-          sender->realtime_sent_time().time_since_epoch().count());
-      message_reception_reply_.mutable_message()->mutate_remote_queue_index(
-          sender->sent_queue_index());
+      // Then, we only can send if there are no pending timestamps.
+      bool push_timestamp = false;
+      if (timestamp_buffer_.empty()) {
+        if (!SendTimestamp(timestamp)) {
+          // Whops, we failed to send, queue and try again.
+          push_timestamp = true;
+        }
+      } else {
+        push_timestamp = true;
+      }
 
-      // Unique ID is channel_index and monotonic clock.
-      // TODO(austin): Depending on if we are the logger node or not, we need to
-      // guarentee that this ack gets received too...  Same path as the logger.
-      client_.Send(kTimestampStream(),
-                   std::string_view(reinterpret_cast<const char *>(
-                                        message_reception_reply_.span().data()),
-                                    message_reception_reply_.span().size()),
-                   0);
+      if (push_timestamp) {
+        // Trigger the timer if we are the first timestamp added, or if the
+        // timer was disabled because the far side disconnected.
+        if (timestamp_buffer_.empty() ||
+            timestamp_retry_buffer_->IsDisabled()) {
+          timestamp_retry_buffer_->Schedule(event_loop_->monotonic_now() +
+                                            chrono::milliseconds(100));
+        }
+        VLOG(1) << this << " Queued timestamp " << timestamp.channel_index
+                << " " << timestamp.queue_index;
+        timestamp_buffer_.Push(timestamp);
+      }
     }
   }
 
@@ -371,6 +393,62 @@
           << " cumtsn=" << message->header.rcvinfo.rcv_cumtsn << ")";
 }
 
+bool SctpClientConnection::SendTimestamp(SavedTimestamp timestamp) {
+  // Now fill out the message received reply.  This uses a MessageHeader
+  // container so it can be directly logged.
+  message_reception_reply_.mutable_message()->mutate_channel_index(
+      timestamp.channel_index);
+  message_reception_reply_.mutable_message()->mutate_monotonic_sent_time(
+      timestamp.monotonic_sent_time);
+  message_reception_reply_.mutable_message()->mutate_realtime_sent_time(
+      timestamp.realtime_sent_time);
+  message_reception_reply_.mutable_message()->mutate_queue_index(
+      timestamp.queue_index);
+
+  // And capture the relevant data needed to generate the forwarding
+  // MessageHeader.
+  message_reception_reply_.mutable_message()->mutate_monotonic_remote_time(
+      timestamp.monotonic_remote_time);
+  message_reception_reply_.mutable_message()->mutate_realtime_remote_time(
+      timestamp.realtime_remote_time);
+  message_reception_reply_.mutable_message()->mutate_remote_queue_index(
+      timestamp.remote_queue_index);
+
+  // Unique ID is channel_index and monotonic clock.
+  VLOG(1) << this << " Sent timestamp " << timestamp.channel_index << " "
+          << timestamp.queue_index;
+  if (!client_.Send(
+          kTimestampStream(),
+          std::string_view(reinterpret_cast<const char *>(
+                               message_reception_reply_.span().data()),
+                           message_reception_reply_.span().size()),
+          0)) {
+    // Count the failure.
+    connection_->mutate_timestamp_send_failures(
+        connection_->timestamp_send_failures() + 1);
+    return false;
+  }
+  return true;
+}
+
+void SctpClientConnection::SendTimestamps() {
+  // This is only called from the timer, and the timer is only enabled when
+  // there is something in the buffer.  Explode if that assumption is false.
+  CHECK(!timestamp_buffer_.empty());
+  do {
+    if (!SendTimestamp(timestamp_buffer_[0])) {
+      timestamp_retry_buffer_->Schedule(event_loop_->monotonic_now() +
+                                        chrono::milliseconds(100));
+      return;
+    } else {
+      VLOG(1) << this << " Resent timestamp "
+              << timestamp_buffer_[0].channel_index << " "
+              << timestamp_buffer_[0].queue_index;
+      timestamp_buffer_.Shift();
+    }
+  } while (!timestamp_buffer_.empty());
+}
+
 MessageBridgeClient::MessageBridgeClient(
     aos::ShmEventLoop *event_loop, std::string config_sha256,
     SctpAuthMethod requested_authentication)

diff --git a/aos/network/message_bridge_client_lib.h b/aos/network/message_bridge_client_lib.h
index e4b5e84..2df3599 100644
--- a/aos/network/message_bridge_client_lib.h
+++ b/aos/network/message_bridge_client_lib.h

@@ -3,6 +3,7 @@
 
 #include <string_view>
 
+#include "aos/containers/ring_buffer.h"
 #include "aos/events/event_loop.h"
 #include "aos/events/logging/logger_generated.h"
 #include "aos/events/shm_event_loop.h"
@@ -49,6 +50,18 @@
   }
 
  private:
+  // Datastructure to hold the timestamps we failed to deliver in.  All the
+  // fields match the reply exactly.
+  struct SavedTimestamp {
+    size_t channel_index;
+    int64_t monotonic_sent_time;
+    int64_t realtime_sent_time;
+    uint32_t queue_index;
+    int64_t monotonic_remote_time;
+    int64_t realtime_remote_time;
+    uint32_t remote_queue_index;
+  };
+
   // Reads a message from the socket.  Could be a notification.
   void MessageReceived();
 
@@ -61,6 +74,9 @@
   void NodeDisconnected();
   void HandleData(const Message *message);
 
+  // Sends a timestamp, and returns true on success, false on failure.
+  bool SendTimestamp(SavedTimestamp timestamp);
+
   // Schedules connect_timer_ for a ways in the future. If one of our messages
   // gets dropped, the server might be waiting for this, so if we don't hear
   // from the server for a while we'll try sending it again.
@@ -69,6 +85,9 @@
                              kReconnectTimeout);
   }
 
+  // Sends out as many queued timestamps as we have and can send.
+  void SendTimestamps();
+
   // Event loop to register the server on.
   aos::ShmEventLoop *const event_loop_;
 
@@ -97,11 +116,20 @@
   // Timer which fires to handle reconnections.
   aos::TimerHandler *connect_timer_;
 
+  // Timer which fires to handle resending timestamps.
+  aos::TimerHandler *timestamp_retry_buffer_;
+
   // ClientConnection statistics message to modify.  This will be published
   // periodicially.
   MessageBridgeClientStatus *client_status_;
   int client_index_;
   ClientConnection *connection_;
+
+  // Boot UUID for the remote for any timestamps in the buffer.
+  UUID timestamps_uuid_ = UUID::Zero();
+
+  // Any timestamps that we were unable to send back.
+  aos::RingBuffer<SavedTimestamp, 1024> timestamp_buffer_;
 };
 
 // This encapsulates the state required to talk to *all* the servers from this

diff --git a/aos/network/message_bridge_client_status.cc b/aos/network/message_bridge_client_status.cc
index 2ba2ae0..272e49e 100644
--- a/aos/network/message_bridge_client_status.cc
+++ b/aos/network/message_bridge_client_status.cc

@@ -37,6 +37,7 @@
     connection_builder.add_connected_since_time(
         monotonic_clock::min_time.time_since_epoch().count());
     connection_builder.add_connection_count(0);
+    connection_builder.add_timestamp_send_failures(0);
     connection_offsets.emplace_back(connection_builder.Finish());
   }
   flatbuffers::Offset<
@@ -144,6 +145,11 @@
     client_connection_builder.add_partial_deliveries(
         connection->partial_deliveries());
 
+    if (connection->timestamp_send_failures() != 0) {
+      client_connection_builder.add_timestamp_send_failures(
+          connection->timestamp_send_failures());
+    }
+
     if (!uuid_offset.IsNull()) {
       client_connection_builder.add_boot_uuid(uuid_offset);
     }

diff --git a/aos/network/message_bridge_test.cc b/aos/network/message_bridge_test.cc
index 4eb5e8b..cc46926 100644
--- a/aos/network/message_bridge_test.cc
+++ b/aos/network/message_bridge_test.cc

@@ -733,7 +733,9 @@
   {
     MakePi2Server();
 
-    RunPi2Server(chrono::milliseconds(3050));
+    // Wait long enough for the client to connect again.  It currently takes 3
+    // seconds of connection to estimate the time offset.
+    RunPi2Server(chrono::milliseconds(4050));
 
     // And confirm we are synchronized again.
     EXPECT_TRUE(pi1_server_statistics_fetcher.Fetch());

diff --git a/aos/network/message_bridge_test_lib.cc b/aos/network/message_bridge_test_lib.cc
index d5a5f8f..16e4a4b 100644
--- a/aos/network/message_bridge_test_lib.cc
+++ b/aos/network/message_bridge_test_lib.cc

@@ -3,8 +3,6 @@
 DECLARE_string(boot_uuid);
 
 namespace aos {
-void SetShmBase(const std::string_view base);
-
 namespace message_bridge::testing {
 
 namespace chrono = std::chrono;
@@ -20,7 +18,7 @@
 }
 
 void DoSetShmBase(const std::string_view node) {
-  aos::SetShmBase(ShmBase(node));
+  aos::testing::SetShmBase(ShmBase(node));
 }
 
 ThreadedEventLoopRunner::ThreadedEventLoopRunner(aos::ShmEventLoop *event_loop)
@@ -75,6 +73,7 @@
 void MessageBridgeParameterizedTest::MakePi1Server(
     std::string server_config_sha256) {
   OnPi1();
+  LOG(INFO) << "Making pi1 server";
   FLAGS_application_name = "pi1_message_bridge_server";
   pi1_server_event_loop =
       std::make_unique<aos::ShmEventLoop>(&config.message());
@@ -87,6 +86,7 @@
 
 void MessageBridgeParameterizedTest::RunPi1Server(
     chrono::nanoseconds duration) {
+  LOG(INFO) << "Running pi1 server";
   // Set up a shutdown callback.
   aos::TimerHandler *const quit = pi1_server_event_loop->AddTimer(
       [this]() { pi1_server_event_loop->Exit(); });
@@ -99,11 +99,13 @@
 }
 
 void MessageBridgeParameterizedTest::StartPi1Server() {
+  LOG(INFO) << "Starting pi1 server";
   pi1_server_thread =
       std::make_unique<ThreadedEventLoopRunner>(pi1_server_event_loop.get());
 }
 
 void MessageBridgeParameterizedTest::StopPi1Server() {
+  LOG(INFO) << "Stopping pi1 server";
   pi1_server_thread.reset();
   pi1_message_bridge_server.reset();
   pi1_server_event_loop.reset();
@@ -111,6 +113,7 @@
 
 void MessageBridgeParameterizedTest::MakePi1Client() {
   OnPi1();
+  LOG(INFO) << "Making pi1 client";
   FLAGS_application_name = "pi1_message_bridge_client";
   pi1_client_event_loop =
       std::make_unique<aos::ShmEventLoop>(&config.message());
@@ -120,11 +123,13 @@
 }
 
 void MessageBridgeParameterizedTest::StartPi1Client() {
+  LOG(INFO) << "Starting pi1 client";
   pi1_client_thread =
       std::make_unique<ThreadedEventLoopRunner>(pi1_client_event_loop.get());
 }
 
 void MessageBridgeParameterizedTest::StopPi1Client() {
+  LOG(INFO) << "Stopping pi1 client";
   pi1_client_thread.reset();
   pi1_message_bridge_client.reset();
   pi1_client_event_loop.reset();
@@ -132,6 +137,7 @@
 
 void MessageBridgeParameterizedTest::MakePi1Test() {
   OnPi1();
+  LOG(INFO) << "Making pi1 test";
   FLAGS_application_name = "test1";
   pi1_test_event_loop = std::make_unique<aos::ShmEventLoop>(&config.message());
 
@@ -156,14 +162,19 @@
 }
 
 void MessageBridgeParameterizedTest::StartPi1Test() {
+  LOG(INFO) << "Starting pi1 test";
   pi1_test_thread =
       std::make_unique<ThreadedEventLoopRunner>(pi1_test_event_loop.get());
 }
 
-void MessageBridgeParameterizedTest::StopPi1Test() { pi1_test_thread.reset(); }
+void MessageBridgeParameterizedTest::StopPi1Test() {
+  LOG(INFO) << "Stopping pi1 test";
+  pi1_test_thread.reset();
+}
 
 void MessageBridgeParameterizedTest::MakePi2Server() {
   OnPi2();
+  LOG(INFO) << "Making pi2 server";
   FLAGS_application_name = "pi2_message_bridge_server";
   pi2_server_event_loop =
       std::make_unique<aos::ShmEventLoop>(&config.message());
@@ -174,6 +185,7 @@
 
 void MessageBridgeParameterizedTest::RunPi2Server(
     chrono::nanoseconds duration) {
+  LOG(INFO) << "Running pi2 server";
   // Schedule a shutdown callback.
   aos::TimerHandler *const quit = pi2_server_event_loop->AddTimer(
       [this]() { pi2_server_event_loop->Exit(); });
@@ -186,11 +198,13 @@
 }
 
 void MessageBridgeParameterizedTest::StartPi2Server() {
+  LOG(INFO) << "Starting pi2 server";
   pi2_server_thread =
       std::make_unique<ThreadedEventLoopRunner>(pi2_server_event_loop.get());
 }
 
 void MessageBridgeParameterizedTest::StopPi2Server() {
+  LOG(INFO) << "Stopping pi2 server";
   pi2_server_thread.reset();
   pi2_message_bridge_server.reset();
   pi2_server_event_loop.reset();
@@ -198,6 +212,7 @@
 
 void MessageBridgeParameterizedTest::MakePi2Client() {
   OnPi2();
+  LOG(INFO) << "Making pi2 client";
   FLAGS_application_name = "pi2_message_bridge_client";
   pi2_client_event_loop =
       std::make_unique<aos::ShmEventLoop>(&config.message());
@@ -208,6 +223,7 @@
 
 void MessageBridgeParameterizedTest::RunPi2Client(
     chrono::nanoseconds duration) {
+  LOG(INFO) << "Running pi2 client";
   // Run for 5 seconds to make sure we have time to estimate the offset.
   aos::TimerHandler *const quit = pi2_client_event_loop->AddTimer(
       [this]() { pi2_client_event_loop->Exit(); });
@@ -221,11 +237,13 @@
 }
 
 void MessageBridgeParameterizedTest::StartPi2Client() {
+  LOG(INFO) << "Starting pi2 client";
   pi2_client_thread =
       std::make_unique<ThreadedEventLoopRunner>(pi2_client_event_loop.get());
 }
 
 void MessageBridgeParameterizedTest::StopPi2Client() {
+  LOG(INFO) << "Stopping pi2 client";
   pi2_client_thread.reset();
   pi2_message_bridge_client.reset();
   pi2_client_event_loop.reset();
@@ -233,6 +251,7 @@
 
 void MessageBridgeParameterizedTest::MakePi2Test() {
   OnPi2();
+  LOG(INFO) << "Making pi2 test";
   FLAGS_application_name = "test2";
   pi2_test_event_loop = std::make_unique<aos::ShmEventLoop>(&config.message());
 
@@ -257,10 +276,15 @@
 }
 
 void MessageBridgeParameterizedTest::StartPi2Test() {
+  LOG(INFO) << "Starting pi2 test";
   pi2_test_thread =
       std::make_unique<ThreadedEventLoopRunner>(pi2_test_event_loop.get());
 }
 
-void MessageBridgeParameterizedTest::StopPi2Test() { pi2_test_thread.reset(); }
+void MessageBridgeParameterizedTest::StopPi2Test() {
+  LOG(INFO) << "Stopping pi2 test";
+  pi2_test_thread.reset();
+}
+
 }  // namespace message_bridge::testing
 }  // namespace aos

diff --git a/aos/network/multinode_timestamp_filter.cc b/aos/network/multinode_timestamp_filter.cc
index 417845b..85ee12c 100644
--- a/aos/network/multinode_timestamp_filter.cc
+++ b/aos/network/multinode_timestamp_filter.cc

@@ -877,20 +877,27 @@
   return updated;
 }
 
-std::tuple<std::vector<BootTimestamp>, Eigen::VectorXd, size_t, size_t>
+std::optional<
+    std::tuple<std::vector<BootTimestamp>, Eigen::VectorXd, size_t, size_t>>
 SolveConstrainedNewton(NewtonSolver *solver, TimestampProblem *problem,
                        const size_t max_iterations) {
   std::vector<size_t> active_constraints;
 
   while (true) {
-    Eigen::VectorXd y;
     size_t iteration;
     size_t solution_node;
-    std::vector<size_t> new_active_constraints;
 
-    std::tie(y, solution_node, iteration, new_active_constraints) =
-        solver->SolveConstrainedNewton(problem, max_iterations,
-                                       active_constraints);
+    auto solver_result = solver->SolveConstrainedNewton(problem, max_iterations,
+                                                        active_constraints);
+    if (!solver_result.has_value()) {
+      return std::nullopt;
+    }
+
+    std::tie(std::ignore, solution_node, iteration, std::ignore) =
+        *solver_result;
+    Eigen::VectorXd y = std::move(std::get<0>(*solver_result));
+    std::vector<size_t> new_active_constraints =
+        std::move(std::get<3>(*solver_result));
 
     std::vector<BootTimestamp> result =
         problem->PackResults(iteration > max_iterations ||
@@ -913,7 +920,7 @@
   }
 }
 
-std::tuple<Eigen::VectorXd, size_t, size_t, std::vector<size_t>>
+std::optional<std::tuple<Eigen::VectorXd, size_t, size_t, std::vector<size_t>>>
 NewtonSolver::SolveConstrainedNewton(
     Problem *problem, const size_t max_iterations,
     const absl::Span<const size_t> original_active_constraints) {
@@ -1197,8 +1204,9 @@
     }
   }
 
-  if (iteration > max_iterations) {
-    LOG(FATAL) << "Failed to converge on solve " << my_solve_number_;
+  if (iteration > max_iterations && FLAGS_crash_on_solve_failure) {
+    LOG(ERROR) << "Failed to converge on solve " << my_solve_number_;
+    return std::nullopt;
   }
 
   return std::make_tuple(std::move(y), solution_node, iteration,
@@ -1345,45 +1353,63 @@
   }
 }
 
-std::optional<const std::tuple<distributed_clock::time_point,
-                               std::vector<BootTimestamp>> *>
+std::optional<std::optional<const std::tuple<distributed_clock::time_point,
+                                             std::vector<BootTimestamp>> *>>
 InterpolatedTimeConverter::QueueNextTimestamp() {
-  std::optional<
-      std::tuple<distributed_clock::time_point, std::vector<BootTimestamp>>>
+  std::optional<std::optional<
+      std::tuple<distributed_clock::time_point, std::vector<BootTimestamp>>>>
       next_time = NextTimestamp();
-  if (!next_time) {
-    VLOG(1) << "Last timestamp, calling it quits";
-    at_end_ = true;
+  if (!next_time.has_value()) {
+    VLOG(1) << "Error in processing timestamps.";
     return std::nullopt;
   }
+  if (!next_time.value().has_value()) {
+    VLOG(1) << "Last timestamp, calling it quits";
+    std::optional<std::optional<const std::tuple<distributed_clock::time_point,
+                                                 std::vector<BootTimestamp>> *>>
+        result;
+    result.emplace(std::nullopt);
+    // Check that C++ actually works how we think it does...
+    CHECK(result.has_value());
+    at_end_ = true;
+    return result;
+  }
 
-  VLOG(1) << "Fetched next timestamp while solving: " << std::get<0>(*next_time)
-          << " ->";
-  for (BootTimestamp t : std::get<1>(*next_time)) {
+  VLOG(1) << "Fetched next timestamp while solving: "
+          << std::get<0>(**next_time) << " ->";
+  for (BootTimestamp t : std::get<1>(**next_time)) {
     VLOG(1) << "  " << t;
   }
 
-  CHECK_EQ(node_count_, std::get<1>(*next_time).size());
+  CHECK_EQ(node_count_, std::get<1>(**next_time).size());
 
   if (times_.empty()) {
-    for (BootTimestamp t : std::get<1>(*next_time)) {
+    for (BootTimestamp t : std::get<1>(**next_time)) {
       CHECK_EQ(t.boot, 0u);
     }
   } else {
     bool rebooted = false;
     for (size_t i = 0; i < node_count_; ++i) {
       if (std::get<1>(times_.back())[i].boot !=
-          std::get<1>(*next_time)[i].boot) {
+          std::get<1>(**next_time)[i].boot) {
         rebooted = true;
         break;
       }
     }
     if (rebooted) {
       CHECK(reboot_found_);
-      reboot_found_(std::get<0>(*next_time), std::get<1>(*next_time));
+      if (VLOG_IS_ON(2)) {
+        VLOG(2) << "Notified reboot of";
+        size_t node_index = 0;
+        for (logger::BootTimestamp t : std::get<1>(**next_time)) {
+          VLOG(2) << "  Node " << node_index << " " << t;
+          ++node_index;
+        }
+      }
+      reboot_found_(std::get<0>(**next_time), std::get<1>(**next_time));
     }
   }
-  times_.emplace_back(std::move(*next_time));
+  times_.emplace_back(std::move(**next_time));
   return &times_.back();
 }
 
@@ -1685,7 +1711,7 @@
   }
 }
 
-void MultiNodeNoncausalOffsetEstimator::FlushAndClose(bool destructor) {
+bool MultiNodeNoncausalOffsetEstimator::FlushAndClose(bool destructor) {
   // Write out all the data in our filters.
   FlushAllSamples(true);
   if (fp_) {
@@ -1718,6 +1744,7 @@
         }
       }
     }
+    filter_fps_.clear();
   }
   if (sample_fps_.size() != 0) {
     for (std::vector<FILE *> &filter_fp : sample_fps_) {
@@ -1727,6 +1754,7 @@
         }
       }
     }
+    sample_fps_.clear();
   }
 
   if (all_done_) {
@@ -1736,13 +1764,14 @@
         std::optional<std::tuple<BootTimestamp, BootDuration>> next =
             filter.filter->Consume();
         if (next) {
-          skip_order_validation_
-              ? LOG(WARNING)
-              : LOG(FATAL) << "MultiNodeNoncausalOffsetEstimator reported all "
-                              "done, but "
-                           << node_a_index << " -> " << filter.b_index
-                           << " found more data at time " << std::get<0>(*next)
-                           << ".  Time estimation was silently wrong.";
+          LOG(ERROR) << "MultiNodeNoncausalOffsetEstimator reported all "
+                        "done, but "
+                     << node_a_index << " -> " << filter.b_index
+                     << " found more data at time " << std::get<0>(*next)
+                     << ".  Time estimation was silently wrong.";
+          if (!skip_order_validation_) {
+            return false;
+          }
         }
       }
       ++node_a_index;
@@ -1753,14 +1782,21 @@
   if (!node_samples_.empty()) {
     for (NodeSamples &node : node_samples_) {
       for (SingleNodeSamples &timestamps : node.nodes) {
-        CHECK(timestamps.messages.empty());
+        if (!timestamps.messages.empty()) {
+          LOG(ERROR) << "Timestamps still remaining.";
+          return false;
+        }
       }
     }
   }
+  return true;
 }
 
 MultiNodeNoncausalOffsetEstimator::~MultiNodeNoncausalOffsetEstimator() {
-  FlushAndClose(true);
+  const bool success = FlushAndClose(true);
+  if (!non_fatal_destructor_checks_) {
+    CHECK(success);
+  }
 }
 
 UUID MultiNodeNoncausalOffsetEstimator::boot_uuid(size_t node_index,
@@ -2163,12 +2199,15 @@
   BitSet64 all_live_nodes(problem.size());
   const BitSet64 all_nodes = ~BitSet64(problem.size());
 
-  for (size_t node_index = 0; node_index < timestamp_mappers_.size();
-       ++node_index) {
-    if (timestamp_mappers_[node_index] != nullptr) {
-      // Make sure we have enough data queued such that if we are going to
-      // have a timestamp on this filter, we do have a timestamp queued.
-      timestamp_mappers_[node_index]->QueueFor(time_estimation_buffer_seconds_);
+  if (time_estimation_buffer_seconds_ != chrono::seconds(0)) {
+    for (size_t node_index = 0; node_index < timestamp_mappers_.size();
+         ++node_index) {
+      if (timestamp_mappers_[node_index] != nullptr) {
+        // Make sure we have enough data queued such that if we are going to
+        // have a timestamp on this filter, we do have a timestamp queued.
+        timestamp_mappers_[node_index]->QueueFor(
+            time_estimation_buffer_seconds_);
+      }
     }
   }
 
@@ -2188,6 +2227,10 @@
           problem.add_clock_offset_filter(node_a_index, filter.filter,
                                           filter.b_index, filter.b_filter);
 
+          if (time_estimation_buffer_seconds_ == chrono::seconds(0)) {
+            continue;
+          }
+
           if (timestamp_mappers_[node_a_index] != nullptr) {
             // Now, we have cases at startup where we have a couple of points
             // followed by a long gap, followed by the body of the data.  We are
@@ -2423,7 +2466,8 @@
   return std::make_tuple(candidate_times, boots_all_match);
 }
 
-std::tuple<NoncausalTimestampFilter *, std::vector<BootTimestamp>, int>
+std::optional<
+    std::tuple<NoncausalTimestampFilter *, std::vector<BootTimestamp>, int>>
 MultiNodeNoncausalOffsetEstimator::SimultaneousSolution(
     TimestampProblem *problem,
     const std::vector<CandidateTimes> candidate_times,
@@ -2482,8 +2526,11 @@
 
     if (iterations > kMaxIterations && FLAGS_crash_on_solve_failure) {
       UpdateSolution(std::move(solution));
-      FlushAndClose(false);
-      LOG(FATAL) << "Failed to converge.";
+      if (!FlushAndClose(false)) {
+        return std::nullopt;
+      }
+      LOG(ERROR) << "Failed to converge.";
+      return std::nullopt;
     }
 
     if (!problem->ValidateSolution(solution, true)) {
@@ -2497,13 +2544,16 @@
         problem->Debug();
         if (!skip_order_validation_) {
           UpdateSolution(solution);
-          FlushAndClose(false);
-          LOG(FATAL) << "Bailing, use --skip_order_validation to continue.  "
+          if (!FlushAndClose(false)) {
+            return std::nullopt;
+          }
+          LOG(ERROR) << "Bailing, use --skip_order_validation to continue.  "
                         "Use at your own risk.";
+          return std::nullopt;
         }
       } else {
         // Aw, our initial attempt to solve failed.  Now, let's try again with
-        // constriants.
+        // constraints.
         for (size_t node_index = 0; node_index < base_times.size();
              ++node_index) {
           problem->set_base_clock(node_index, solution[node_index]);
@@ -2515,14 +2565,24 @@
         }
 
         problem->set_points(points);
-        std::tie(solution, solution_y, solution_index, iterations) =
+        auto solver_result =
             SolveConstrainedNewton(&solver, problem, kMaxIterations);
+        if (!solver_result.has_value()) {
+          return std::nullopt;
+        }
+        solution = std::move(std::get<0>(solver_result.value()));
+        solution_y = std::move(std::get<1>(solver_result.value()));
+        std::tie(std::ignore, std::ignore, solution_index, iterations) =
+            *solver_result;
 
         if (iterations > kMaxIterations && FLAGS_crash_on_solve_failure) {
           UpdateSolution(std::move(solution));
-          FlushAndClose(false);
-          LOG(FATAL) << "Failed to converge for problem "
+          if (!FlushAndClose(false)) {
+            return std::nullopt;
+          }
+          LOG(ERROR) << "Failed to converge for problem "
                      << solver.my_solve_number();
+          return std::nullopt;
         }
         if (!problem->ValidateSolution(solution, false)) {
           LOG(WARNING) << "Invalid solution, constraints not met for problem "
@@ -2533,9 +2593,12 @@
           problem->Debug();
           if (!skip_order_validation_) {
             UpdateSolution(solution);
-            FlushAndClose(false);
-            LOG(FATAL) << "Bailing, use --skip_order_validation to continue.  "
+            if (!FlushAndClose(false)) {
+              return std::nullopt;
+            }
+            LOG(ERROR) << "Bailing, use --skip_order_validation to continue.  "
                           "Use at your own risk.";
+            return std::nullopt;
           }
         }
       }
@@ -2548,7 +2611,7 @@
   return std::make_tuple(next_filter, std::move(result_times), solution_index);
 }
 
-void MultiNodeNoncausalOffsetEstimator::CheckInvalidDistance(
+bool MultiNodeNoncausalOffsetEstimator::CheckInvalidDistance(
     const std::vector<BootTimestamp> &result_times,
     const std::vector<BootTimestamp> &solution) {
   // If times are close enough, drop the invalid time.
@@ -2562,7 +2625,7 @@
               << (result_times[i].time - solution[i].time).count() << "ns";
     }
     VLOG(1) << "Ignoring because it is close enough.";
-    return;
+    return true;
   }
   // Somehow the new solution is better *and* worse than the old
   // solution...  This is an internal failure because that means time
@@ -2579,13 +2642,18 @@
     LOG(ERROR) << "Skipping because --skip_order_validation";
   } else {
     UpdateSolution(solution);
-    FlushAndClose(false);
-    LOG(FATAL) << "Please investigate.  Use --max_invalid_distance_ns="
+    if (!FlushAndClose(false)) {
+      return false;
+    }
+    LOG(ERROR) << "Please investigate.  Use --max_invalid_distance_ns="
                << invalid_distance.count() << " to ignore this.";
+    return false;
   }
+  return true;
 }
 
-std::tuple<NoncausalTimestampFilter *, std::vector<BootTimestamp>, int>
+std::optional<
+    std::tuple<NoncausalTimestampFilter *, std::vector<BootTimestamp>, int>>
 MultiNodeNoncausalOffsetEstimator::SequentialSolution(
     TimestampProblem *problem,
     const std::vector<CandidateTimes> candidate_times,
@@ -2689,9 +2757,12 @@
 
     if (iterations > kMaxIterations && FLAGS_crash_on_solve_failure) {
       UpdateSolution(std::move(solution));
-      FlushAndClose(false);
-      LOG(FATAL) << "Failed to converge for problem "
+      if (!FlushAndClose(false)) {
+        return std::nullopt;
+      }
+      LOG(ERROR) << "Failed to converge for problem "
                  << solver.my_solve_number();
+      return std::nullopt;
     }
 
     // Bypass checking if order validation is turned off.  This lets us dump a
@@ -2710,9 +2781,12 @@
         problem->Debug();
         if (!skip_order_validation_) {
           UpdateSolution(solution);
-          FlushAndClose(false);
-          LOG(FATAL) << "Bailing, use --skip_order_validation to continue.  "
+          if (!FlushAndClose(false)) {
+            return std::nullopt;
+          }
+          LOG(ERROR) << "Bailing, use --skip_order_validation to continue.  "
                         "Use at your own risk.";
+          return std::nullopt;
         }
       } else {
         // Seed with our last solution.
@@ -2722,14 +2796,24 @@
         }
         // And now resolve constrained.
         problem->set_points(points);
-        std::tie(solution, solution_y, solution_index, iterations) =
+        auto solver_result =
             SolveConstrainedNewton(&solver, problem, kMaxIterations);
+        if (!solver_result.has_value()) {
+          return std::nullopt;
+        }
+        solution = std::move(std::get<0>(solver_result.value()));
+        solution_y = std::move(std::get<1>(solver_result.value()));
+        std::tie(std::ignore, std::ignore, solution_index, iterations) =
+            *solver_result;
 
         if (iterations > kMaxIterations && FLAGS_crash_on_solve_failure) {
           UpdateSolution(std::move(solution));
-          FlushAndClose(false);
-          LOG(FATAL) << "Failed to converge for problem "
+          if (!FlushAndClose(false)) {
+            return std::nullopt;
+          }
+          LOG(ERROR) << "Failed to converge for problem "
                      << solver.my_solve_number();
+          return std::nullopt;
         }
         if (!problem->ValidateSolution(solution, false)) {
           LOG(WARNING) << "Invalid solution, constraints not met for problem "
@@ -2740,9 +2824,12 @@
           problem->Debug();
           if (!skip_order_validation_) {
             UpdateSolution(solution);
-            FlushAndClose(false);
-            LOG(FATAL) << "Bailing, use --skip_order_validation to continue.  "
+            if (!FlushAndClose(false)) {
+              return std::nullopt;
+            }
+            LOG(ERROR) << "Bailing, use --skip_order_validation to continue.  "
                           "Use at your own risk.";
+            return std::nullopt;
           }
         }
       }
@@ -2776,7 +2863,9 @@
         solution_index = node_a_index;
         break;
       case TimeComparison::kInvalid: {
-        CheckInvalidDistance(result_times, solution);
+        if (!CheckInvalidDistance(result_times, solution)) {
+          return std::nullopt;
+        }
         if (next_node_filter) {
           std::optional<std::tuple<logger::BootTimestamp, logger::BootDuration>>
               result = next_node_filter->Consume();
@@ -2799,18 +2888,18 @@
   return std::make_tuple(next_filter, std::move(result_times), solution_index);
 }
 
-std::tuple<NoncausalTimestampFilter *, std::vector<BootTimestamp>, int>
+std::optional<
+    std::tuple<NoncausalTimestampFilter *, std::vector<BootTimestamp>, int>>
 MultiNodeNoncausalOffsetEstimator::NextSolution(
     TimestampProblem *problem, const std::vector<BootTimestamp> &base_times) {
   // Ok, now solve for the minimum time on each channel.
-  std::vector<BootTimestamp> result_times;
-
   bool boots_all_match = true;
   std::vector<CandidateTimes> candidate_times;
   std::tie(candidate_times, boots_all_match) = MakeCandidateTimes();
 
-  NoncausalTimestampFilter *next_filter = nullptr;
-  size_t solution_index = 0;
+  std::optional<
+      std::tuple<NoncausalTimestampFilter *, std::vector<BootTimestamp>, int>>
+      result;
 
   // We can significantly speed things up if we know that all the boots match by
   // solving for everything at once.  If the boots don't match, the combined min
@@ -2818,21 +2907,21 @@
   // actually using the boot from the candidate times to figure out which
   // interpolation function to use under the hood.
   if (boots_all_match) {
-    std::tie(next_filter, result_times, solution_index) =
+    result =
         SimultaneousSolution(problem, std::move(candidate_times), base_times);
   } else {
     // If all the boots don't match, fall back to the old method of comparing
     // all the solutions individually.
-    std::tie(next_filter, result_times, solution_index) =
+    result =
         SequentialSolution(problem, std::move(candidate_times), base_times);
   }
-  if (VLOG_IS_ON(1)) {
-    VLOG(1) << "Best solution is for node " << solution_index;
-    for (size_t i = 0; i < result_times.size(); ++i) {
-      VLOG(1) << "  " << result_times[i];
+  if (VLOG_IS_ON(1) && result.has_value()) {
+    VLOG(1) << "Best solution is for node " << std::get<2>(*result);
+    for (size_t i = 0; i < std::get<1>(*result).size(); ++i) {
+      VLOG(1) << "  " << std::get<1>(*result)[i];
     }
   }
-  return std::make_tuple(next_filter, std::move(result_times), solution_index);
+  return result;
 }
 
 void MultiNodeNoncausalOffsetEstimator::UpdateSolution(
@@ -2902,19 +2991,24 @@
   }
 }
 
-std::optional<
-    std::tuple<distributed_clock::time_point, std::vector<BootTimestamp>>>
+std::optional<std::optional<
+    std::tuple<distributed_clock::time_point, std::vector<BootTimestamp>>>>
 MultiNodeNoncausalOffsetEstimator::NextTimestamp() {
   // TODO(austin): Detect and handle there being fewer nodes in the log file
   // than in replay, or them being in a different order.
   TimestampProblem problem = MakeProblem();
 
   // Ok, now solve for the minimum time on each channel.
-  std::vector<BootTimestamp> result_times;
+  auto next_solution = NextSolution(&problem, last_monotonics_);
+  if (!next_solution.has_value()) {
+    return std::nullopt;
+  }
+  std::vector<BootTimestamp> result_times =
+      std::move(std::get<1>(next_solution.value()));
   NoncausalTimestampFilter *next_filter = nullptr;
   int solution_node_index = 0;
-  std::tie(next_filter, result_times, solution_node_index) =
-      NextSolution(&problem, last_monotonics_);
+  std::tie(next_filter, std::ignore, solution_node_index) =
+      next_solution.value();
 
   CHECK(!all_done_);
 
@@ -2954,7 +3048,13 @@
     if (fp_) {
       fflush(fp_);
     }
-    return std::nullopt;
+    std::optional<std::optional<
+        std::tuple<distributed_clock::time_point, std::vector<BootTimestamp>>>>
+        result;
+    result.emplace(std::nullopt);
+    CHECK(result.has_value());
+    CHECK(!result.value().has_value());
+    return result;
   }
 
   {
@@ -3011,11 +3111,13 @@
         }
         UpdateSolution(std::move(result_times));
         WriteFilter(next_filter, sample);
-        FlushAndClose(false);
-        LOG(FATAL)
+        if (!FlushAndClose(false)) {
+          return std::nullopt;
+        }
+        LOG(ERROR)
             << "Found a solution before the last returned solution on node "
             << solution_node_index;
-        break;
+        return std::nullopt;
       case TimeComparison::kEq:
         WriteFilter(next_filter, sample);
         return NextTimestamp();
@@ -3038,9 +3140,12 @@
         }
         UpdateSolution(std::move(result_times));
         WriteFilter(next_filter, sample);
-        FlushAndClose(false);
-        LOG(FATAL) << "Please investigate.  Use --max_invalid_distance_ns="
+        if (!FlushAndClose(false)) {
+          return std::nullopt;
+        }
+        LOG(ERROR) << "Please investigate.  Use --max_invalid_distance_ns="
                    << invalid_distance.count() << " to ignore this.";
+        return std::nullopt;
       } break;
     }
   }

diff --git a/aos/network/multinode_timestamp_filter.h b/aos/network/multinode_timestamp_filter.h
index e3bfde2..959fe85 100644
--- a/aos/network/multinode_timestamp_filter.h
+++ b/aos/network/multinode_timestamp_filter.h

@@ -121,7 +121,8 @@
   // Returns the solution, the solution_node field from the final derivatives,
   // the number of iterations it took, and a list of the constraints actually
   // used in the solution.
-  std::tuple<Eigen::VectorXd, size_t, size_t, std::vector<size_t>>
+  std::optional<
+      std::tuple<Eigen::VectorXd, size_t, size_t, std::vector<size_t>>>
   SolveConstrainedNewton(Problem *problem, size_t max_iterations,
                          const absl::Span<const size_t> constraint_indices);
 
@@ -354,8 +355,9 @@
 
   // Queues 1 more timestamp in the interpolation list.  This is public for
   // timestamp_extractor so it can hammer on the log until everything is queued.
-  std::optional<const std::tuple<distributed_clock::time_point,
-                                 std::vector<logger::BootTimestamp>> *>
+  // Return type matches that of NextTimestamp().
+  [[nodiscard]] std::optional<std::optional<const std::tuple<
+      distributed_clock::time_point, std::vector<logger::BootTimestamp>> *>>
   QueueNextTimestamp();
 
  private:
@@ -364,8 +366,12 @@
   // A timestamp is a sample of the distributed clock and a corresponding point
   // on every monotonic clock for all the nodes in the factory that this will be
   // hooked up to.
-  virtual std::optional<std::tuple<distributed_clock::time_point,
-                                   std::vector<logger::BootTimestamp>>>
+  // If !NextTimestamp().has_value(), then an error occurred. If
+  // !NextTimestamp().value().has_value(), then there is merely no next
+  // timestamp available.
+  // TODO(james): Swap this to std::expected when available.
+  virtual std::optional<std::optional<std::tuple<
+      distributed_clock::time_point, std::vector<logger::BootTimestamp>>>>
   NextTimestamp() = 0;
 
   // Queues timestamps util the last time in the queue matches the provided
@@ -373,7 +379,9 @@
   template <typename F>
   void QueueUntil(F not_done) {
     while (!at_end_ && (times_.empty() || not_done(times_.back()))) {
-      QueueNextTimestamp();
+      // Check the outer std::optional to watch for errors.
+      CHECK(QueueNextTimestamp().has_value())
+          << ": An error occurred when queueing timestamps.";
     }
 
     CHECK(!times_.empty())
@@ -472,10 +480,6 @@
   void SetTimestampMappers(
       std::vector<logger::TimestampMapper *> timestamp_mappers);
 
-  std::optional<std::tuple<distributed_clock::time_point,
-                           std::vector<logger::BootTimestamp>>>
-  NextTimestamp() override;
-
   UUID boot_uuid(size_t node_index, size_t boot_count) override;
 
   // Checks that all the nodes in the graph are connected.  Needs all filters to
@@ -504,6 +508,14 @@
   // Returns the configuration that we are replaying into.
   const aos::Configuration *configuration() const { return configuration_; }
 
+  // Runs some checks that normally run with fatal CHECK's in the destructor.
+  // Returns false if any checks failed. This is used to allow the
+  // logfile_validator to non-fatally identify certain log sorting issues.
+  [[nodiscard]] bool RunDestructorChecks() {
+    non_fatal_destructor_checks_ = true;
+    return FlushAndClose(false);
+  }
+
  private:
   static constexpr int kMaxIterations = 400;
 
@@ -516,35 +528,39 @@
 
   TimestampProblem MakeProblem();
 
+  std::optional<std::optional<std::tuple<distributed_clock::time_point,
+                                         std::vector<logger::BootTimestamp>>>>
+  NextTimestamp() override;
+
   // Returns the list of candidate times to solve for.
   std::tuple<std::vector<CandidateTimes>, bool> MakeCandidateTimes() const;
 
   // Returns the next solution, the filter which has the control point for it
   // (or nullptr if a start time triggered this to be returned), and the node
   // which triggered it.
-  std::tuple<NoncausalTimestampFilter *, std::vector<logger::BootTimestamp>,
-             int>
+  std::optional<std::tuple<NoncausalTimestampFilter *,
+                           std::vector<logger::BootTimestamp>, int>>
   NextSolution(TimestampProblem *problem,
                const std::vector<logger::BootTimestamp> &base_times);
 
   // Returns the solution (if there is one) for the list of candidate times by
   // solving all the problems simultaneously.  They must be from the same boot.
-  std::tuple<NoncausalTimestampFilter *, std::vector<logger::BootTimestamp>,
-             int>
+  std::optional<std::tuple<NoncausalTimestampFilter *,
+                           std::vector<logger::BootTimestamp>, int>>
   SimultaneousSolution(TimestampProblem *problem,
                        const std::vector<CandidateTimes> candidate_times,
                        const std::vector<logger::BootTimestamp> &base_times);
 
   // Returns the solution (if there is one) for the list of candidate times by
   // solving the problems one after another.  They can be from any boot.
-  std::tuple<NoncausalTimestampFilter *, std::vector<logger::BootTimestamp>,
-             int>
+  std::optional<std::tuple<NoncausalTimestampFilter *,
+                           std::vector<logger::BootTimestamp>, int>>
   SequentialSolution(TimestampProblem *problem,
                      const std::vector<CandidateTimes> candidate_times,
                      const std::vector<logger::BootTimestamp> &base_times);
 
-  // Explodes if the invalid distance is too far.
-  void CheckInvalidDistance(
+  // Returns false if the invalid distance is too far.
+  [[nodiscard]] bool CheckInvalidDistance(
       const std::vector<logger::BootTimestamp> &result_times,
       const std::vector<logger::BootTimestamp> &solution);
 
@@ -560,15 +576,18 @@
 
   // Writes everything to disk anc closes it all out in preparation for either
   // destruction or crashing.
-  void FlushAndClose(bool destructor);
+  // Returns true if everything is healthy; returns false if we discovered
+  // sorting issues when closing things out.
+  [[nodiscard]] bool FlushAndClose(bool destructor);
 
   const Configuration *configuration_;
   const Configuration *logged_configuration_;
 
   std::shared_ptr<const logger::Boots> boots_;
 
-  // If true, skip any validation which would trigger if we see evidence that
-  // time estimation between nodes was incorrect.
+  // If true, don't fatally die on any order validation failures which would
+  // trigger if we see evidence that time estimation between nodes was
+  // incorrect.
   const bool skip_order_validation_;
 
   // List of filters for a connection.  The pointer to the first node will be
@@ -598,6 +617,7 @@
 
   bool first_solution_ = true;
   bool all_done_ = false;
+  bool non_fatal_destructor_checks_ = false;
 
   // Optional file pointers to save the results of the noncausal filter in. This
   // lives here so we can give each sample a distributed clock.

diff --git a/aos/network/multinode_timestamp_filter_test.cc b/aos/network/multinode_timestamp_filter_test.cc
index 9b37806..9aa76c0 100644
--- a/aos/network/multinode_timestamp_filter_test.cc
+++ b/aos/network/multinode_timestamp_filter_test.cc

@@ -407,7 +407,7 @@
 
   std::vector<size_t> used_constraints;
   std::tie(y, solution_node, iterations, used_constraints) =
-      solver.SolveConstrainedNewton(&problem, 20, constraints);
+      solver.SolveConstrainedNewton(&problem, 20, constraints).value();
 
   LOG(INFO) << y.transpose();
 

diff --git a/aos/network/testing_time_converter.cc b/aos/network/testing_time_converter.cc
index c175fe6..9dc5fff 100644
--- a/aos/network/testing_time_converter.cc
+++ b/aos/network/testing_time_converter.cc

@@ -22,7 +22,10 @@
 
 TestingTimeConverter::~TestingTimeConverter() {
   if (at_end_) {
-    CHECK(!NextTimestamp()) << ": At the end but there is more data.";
+    auto next_timestamp = NextTimestamp();
+    CHECK(next_timestamp.has_value()) << ": Unexpected error";
+    CHECK(!next_timestamp.value().has_value())
+        << ": At the end but there is more data.";
   }
 }
 
@@ -105,13 +108,17 @@
   ts_.emplace_back(std::make_tuple(time, std::move(times)));
 }
 
-std::optional<std::tuple<distributed_clock::time_point,
-                         std::vector<logger::BootTimestamp>>>
+std::optional<std::optional<std::tuple<distributed_clock::time_point,
+                                       std::vector<logger::BootTimestamp>>>>
 TestingTimeConverter::NextTimestamp() {
   CHECK(!first_) << ": Tried to pull a timestamp before one was added.  This "
                     "is unlikely to be what you want.";
   if (ts_.empty()) {
-    return std::nullopt;
+    std::optional<std::optional<std::tuple<distributed_clock::time_point,
+                                           std::vector<logger::BootTimestamp>>>>
+        result;
+    result.emplace(std::nullopt);
+    return result;
   }
   auto result = ts_.front();
   ts_.pop_front();

diff --git a/aos/network/testing_time_converter.h b/aos/network/testing_time_converter.h
index 20b8e16..ae85cee 100644
--- a/aos/network/testing_time_converter.h
+++ b/aos/network/testing_time_converter.h

@@ -41,8 +41,8 @@
   void AddNextTimestamp(distributed_clock::time_point time,
                         std::vector<logger::BootTimestamp> times);
 
-  std::optional<std::tuple<distributed_clock::time_point,
-                           std::vector<logger::BootTimestamp>>>
+  std::optional<std::optional<std::tuple<distributed_clock::time_point,
+                                         std::vector<logger::BootTimestamp>>>>
   NextTimestamp() override;
 
   void set_boot_uuid(size_t node_index, size_t boot_count, UUID uuid) {

diff --git a/aos/network/timestamp_filter.cc b/aos/network/timestamp_filter.cc
index 77d3726..4a29dfc 100644
--- a/aos/network/timestamp_filter.cc
+++ b/aos/network/timestamp_filter.cc

@@ -1279,7 +1279,8 @@
         << " seconds after the last sample at " << std::get<0>(timestamps_[0])
         << ".  Increase --time_estimation_buffer_seconds to greater than "
         << chrono::duration<double>(monotonic_now - std::get<0>(timestamps_[0]))
-               .count();
+               .count()
+        << ", or set --force_timestamp_loading";
     return;
   }
   CHECK_GT(monotonic_now, frozen_time_)
@@ -1287,7 +1288,8 @@
       << " before the frozen time of " << frozen_time_
       << ".  Increase "
          "--time_estimation_buffer_seconds to greater than "
-      << chrono::duration<double>(frozen_time_ - monotonic_now).count();
+      << chrono::duration<double>(frozen_time_ - monotonic_now).count()
+      << ", or set --force_timestamp_loading";
 
   // Future samples get quite a bit harder.  We want the line to track the
   // highest point without volating the slope constraint.
@@ -1333,7 +1335,8 @@
         << " seconds after the last sample at " << std::get<0>(timestamps_[0])
         << ".  Increase --time_estimation_buffer_seconds to greater than "
         << chrono::duration<double>(monotonic_now - std::get<0>(timestamps_[0]))
-               .count();
+               .count()
+        << ", or set --force_timestamp_loading";
 
     // Back propagate the max velocity and remove any elements violating the
     // velocity constraint.  This is to handle the case where the offsets were
@@ -1357,8 +1360,8 @@
           << chrono::duration<double>(monotonic_now - std::get<0>(back)).count()
           << " seconds in the past.  Increase --time_estimation_buffer_seconds "
              "to greater than "
-          << chrono::duration<double>(monotonic_now - std::get<0>(back))
-                 .count();
+          << chrono::duration<double>(monotonic_now - std::get<0>(back)).count()
+          << ", or set --force_timestamp_loading";
       VLOG(1) << node_names_
               << " Removing now invalid sample during back propegation of "
               << TimeString(back);
@@ -1554,7 +1557,8 @@
               << ": " << node_names_
               << " Can't pop an already frozen sample.  Increase "
                  "--time_estimation_buffer_seconds to greater than "
-              << chrono::duration<double>(prior_dt).count();
+              << chrono::duration<double>(prior_dt).count()
+              << ", or set --force_timestamp_loading";
 
           VLOG(1) << "Prior slope is too positive, removing prior point "
                   << TimeString(*prior_it);

diff --git a/aos/sha256.cc b/aos/sha256.cc
index ae83792..cfeccde 100644
--- a/aos/sha256.cc
+++ b/aos/sha256.cc

@@ -7,6 +7,8 @@
 #include "absl/types/span.h"
 #include "openssl/sha.h"
 
+#include "aos/util/file.h"
+
 namespace aos {
 
 std::string Sha256(const absl::Span<const uint8_t> str) {
@@ -23,4 +25,13 @@
   return ss.str();
 }
 
+std::string Sha256(std::string_view str) {
+  return Sha256({reinterpret_cast<const uint8_t *>(str.data()), str.size()});
+}
+
+std::string Sha256OfFile(std::filesystem::path file) {
+  const std::string contents = aos::util::ReadFileToStringOrDie(file.string());
+  return Sha256(contents);
+}
+
 }  // namespace aos

diff --git a/aos/sha256.h b/aos/sha256.h
index 7fb9b2f..117bc14 100644
--- a/aos/sha256.h
+++ b/aos/sha256.h

@@ -1,6 +1,7 @@
 #ifndef AOS_SHA256_H_
 #define AOS_SHA256_H_
 
+#include <filesystem>
 #include <string>
 
 #include "absl/types/span.h"
@@ -9,6 +10,10 @@
 
 // Returns the sha256 of a span.
 std::string Sha256(const absl::Span<const uint8_t> str);
+std::string Sha256(std::string_view str);
+
+// Returns the Sha256 of the specified file. Dies on failure to read the file.
+std::string Sha256OfFile(std::filesystem::path file);
 
 }  // namespace aos
 

diff --git a/aos/sha256_test.cc b/aos/sha256_test.cc
new file mode 100644
index 0000000..33ab5ec
--- /dev/null
+++ b/aos/sha256_test.cc

@@ -0,0 +1,27 @@
+#include "aos/sha256.h"
+
+#include "gtest/gtest.h"
+
+#include "aos/testing/tmpdir.h"
+#include "aos/util/file.h"
+
+namespace aos::testing {
+
+constexpr const char *kTestString = "Test String";
+constexpr const char *kTestStringSha =
+    "30c6ff7a44f7035af933babaea771bf177fc38f06482ad06434cbcc04de7ac14";
+
+TEST(Sha256Test, ChecksumString) {
+  EXPECT_EQ(kTestStringSha, Sha256(kTestString));
+  EXPECT_EQ("2b4da12a4bfe66a061c24440521a9e5b994e4f0bcc47a17436275f5283bb6852",
+            Sha256("Test String 2"));
+}
+
+TEST(Sha256Test, ChecksumFile) {
+  const std::filesystem::path test_file =
+      aos::testing::TestTmpDir() + "/test.txt";
+  util::WriteStringToFileOrDie(test_file.string(), kTestString);
+  EXPECT_EQ(kTestStringSha, Sha256OfFile(test_file));
+}
+
+}  // namespace aos::testing

diff --git a/aos/starter/BUILD b/aos/starter/BUILD
index 7ef3777..732cfe3 100644
--- a/aos/starter/BUILD
+++ b/aos/starter/BUILD

@@ -201,11 +201,13 @@
         "$(rootpath :aos_starter)",
         "$(rootpath //aos:aos_dump)",
         "$(rootpath //aos/events/logging:logger_main)",
+        "$(rootpath //aos/events:aos_timing_report_streamer)",
     ],
     data = [
         ":aos_starter",
         ":starterd",
         "//aos:aos_dump",
+        "//aos/events:aos_timing_report_streamer",
         "//aos/events:ping",
         "//aos/events:pingpong_config",
         "//aos/events:pong",

diff --git a/aos/starter/starter.fbs b/aos/starter/starter.fbs
index 7285281..d2cd573 100644
--- a/aos/starter/starter.fbs
+++ b/aos/starter/starter.fbs

@@ -82,6 +82,12 @@
   // actually stopping and the parent process receiving the signal indicating that the application
   // finished stopping.
   process_info: util.ProcessInfo (id: 7);
+
+  // Indicates whether we have observed a recent AOS timing report from
+  // the application. Staleness is calculated based on the timing report period
+  // specified for the starterd application (defaults to 1 Hz, can be overridden
+  // by --timing_report_ms).
+  has_active_timing_report: bool (id: 8);
 }
 
 root_type Status;

diff --git a/aos/starter/starter_demo.py b/aos/starter/starter_demo.py
index 5a50890..89d06a0 100755
--- a/aos/starter/starter_demo.py
+++ b/aos/starter/starter_demo.py

@@ -7,9 +7,9 @@
 
 DESCRIPTION = """
 This script provides a convenient way to experiment with the starter
-and starter_cmd in particular. To run this, run:
+and aos_starter in particular. To run this, run:
 $ bazel run  //aos/starter:starter_demo
-This will then print out instructions for running starter_cmd.
+This will then print out instructions for running aos_starter.
 
 If running via bazel, you should not need to specify the positional
 arguments.
@@ -21,11 +21,7 @@
         formatter_class=argparse.RawDescriptionHelpFormatter)
     parser.add_argument("starterd", help="Location of starterd")
     parser.add_argument("configs", help="Location of the config files")
-    parser.add_argument("ping", help="Location of ping")
-    parser.add_argument("pong", help="Location of pong")
-    parser.add_argument("starter_cmd", help="Location of starter_cmd")
-    parser.add_argument("aos_dump", help="Location of aos_dump")
-    parser.add_argument("logger_main", help="Location of logger_main")
+    parser.add_argument("binaries", nargs='+', help="Binaries to provide")
     args = parser.parse_args()
 
     # Copy all the interesting files into a temporary directory and run
@@ -37,14 +33,11 @@
     # can take a new --shm_base to allow cleaner running on shared systems.
     with tempfile.TemporaryDirectory() as tmpdir:
         shutil.copy(args.starterd, tmpdir + "/starterd")
-        shutil.copy(args.ping, tmpdir + "/ping")
-        shutil.copy(args.pong, tmpdir + "/pong")
-        shutil.copy(args.starter_cmd, tmpdir + "/starter_cmd")
-        shutil.copy(args.aos_dump, tmpdir + "/aos_dump")
-        shutil.copy(args.logger_main, tmpdir + "/logger_main")
+        for binary in args.binaries:
+            shutil.copy(binary, tmpdir + "/" + os.path.basename(binary))
         print(f"Running starter from {tmpdir}")
 
-        print(f"\n\nTo run starter_cmd, do:\ncd {tmpdir}\n./starter_cmd\n\n")
+        print(f"\n\nTo run aos_starter, do:\ncd {tmpdir}\n./aos_starter\n\n")
 
         for config in args.configs.split(' '):
             basename = os.path.basename(config)

diff --git a/aos/starter/starter_test.cc b/aos/starter/starter_test.cc
index 3070f8e..cbb83f6 100644
--- a/aos/starter/starter_test.cc
+++ b/aos/starter/starter_test.cc

@@ -358,6 +358,8 @@
       ASSERT_EQ(pong_app_status->pid(), pong_app_status->process_info()->pid());
       ASSERT_TRUE(pong_app_status->process_info()->has_cpu_usage());
       ASSERT_LE(0.0, pong_app_status->process_info()->cpu_usage());
+      ASSERT_TRUE(pong_app_status->has_has_active_timing_report());
+      ASSERT_TRUE(pong_app_status->has_active_timing_report());
       watcher_loop.Exit();
       SUCCEED();
     }

diff --git a/aos/starter/starterd_lib.cc b/aos/starter/starterd_lib.cc
index f08ecc2..bf0fc31 100644
--- a/aos/starter/starterd_lib.cc
+++ b/aos/starter/starterd_lib.cc

@@ -36,10 +36,13 @@
     : config_msg_(event_loop_config),
       event_loop_(event_loop_config),
       status_sender_(event_loop_.MakeSender<aos::starter::Status>("/aos")),
-      status_timer_(event_loop_.AddTimer([this] {
-        SendStatus();
-        status_count_ = 0;
-      })),
+      status_timer_(event_loop_.AddPhasedLoop(
+          [this](int elapsed_cycles) {
+            ServiceTimingReportFetcher(elapsed_cycles);
+            SendStatus();
+            status_count_ = 0;
+          },
+          std::chrono::milliseconds(1000))),
       cleanup_timer_(event_loop_.AddTimer([this] {
         event_loop_.Exit();
         LOG(INFO) << "Starter event loop exit finished.";
@@ -47,17 +50,14 @@
       max_status_count_(
           event_loop_.GetChannel<aos::starter::Status>("/aos")->frequency() -
           1),
+      timing_report_fetcher_(
+          event_loop_.MakeFetcher<aos::timing::Report>("/aos")),
       shm_base_(FLAGS_shm_base),
       listener_(&event_loop_,
                 [this](signalfd_siginfo signal) { OnSignal(signal); }),
       top_(&event_loop_) {
   event_loop_.SkipAosLog();
 
-  event_loop_.OnRun([this] {
-    status_timer_->Schedule(event_loop_.monotonic_now(),
-                            std::chrono::milliseconds(1000));
-  });
-
   if (!aos::configuration::MultiNode(config_msg_)) {
     event_loop_.MakeWatcher(
         "/aos",
@@ -266,6 +266,23 @@
   event_loop_.Run();
 }
 
+void Starter::ServiceTimingReportFetcher(int elapsed_cycles) {
+  // If there is any chance that it has been longer than one cycle since we last
+  // serviced the fetcher, call Fetch(). This reduces the chances that the
+  // fetcher falls behind when the system is under heavy load. Dropping a few
+  // timing report messages when the system is under stress is fine.
+  if (timing_report_fetcher_.get() == nullptr || elapsed_cycles > 1) {
+    timing_report_fetcher_.Fetch();
+  }
+  while (timing_report_fetcher_.FetchNext()) {
+    for (auto &application : applications_) {
+      application.second.ObserveTimingReport(
+          timing_report_fetcher_.context().monotonic_event_time,
+          timing_report_fetcher_.get());
+    }
+  }
+}
+
 void Starter::SendStatus() {
   aos::Sender<aos::starter::Status>::Builder builder =
       status_sender_.MakeBuilder();

diff --git a/aos/starter/starterd_lib.h b/aos/starter/starterd_lib.h
index 775cdbc..92b20fa 100644
--- a/aos/starter/starterd_lib.h
+++ b/aos/starter/starterd_lib.h

@@ -59,6 +59,10 @@
   // limit.
   void HandleStateChange();
 
+  // Called periodically to run through the timing report fetcher and alert all
+  // the Application's to the new messages.
+  void ServiceTimingReportFetcher(int elapsed_cycles);
+
   void SendStatus();
 
   // Creates a MemoryMappedQueue for the given channel, to pre-allocate shared
@@ -71,12 +75,14 @@
 
   aos::ShmEventLoop event_loop_;
   aos::Sender<aos::starter::Status> status_sender_;
-  aos::TimerHandler *status_timer_;
+  aos::PhasedLoopHandler *status_timer_;
   aos::TimerHandler *cleanup_timer_;
 
   int status_count_ = 0;
   const int max_status_count_;
 
+  aos::Fetcher<aos::timing::Report> timing_report_fetcher_;
+
   std::unordered_map<std::string, Application> applications_;
 
   // Lock and list of all the queues.  This makes it so we can initialize the

diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
index b1320f4..7b5fce6 100644
--- a/aos/starter/subprocess.cc
+++ b/aos/starter/subprocess.cc

@@ -8,6 +8,8 @@
 
 #include "glog/logging.h"
 
+#include "aos/flatbuffer_merge.h"
+
 namespace aos::starter {
 
 // RAII class to become root and restore back to the original user and group
@@ -36,22 +38,25 @@
   gid_t rgid_, egid_, sgid_;
 };
 
-MemoryCGroup::MemoryCGroup(std::string_view name)
-    : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)) {
-  Sudo sudo;
-  int ret = mkdir(cgroup_.c_str(), 0755);
+MemoryCGroup::MemoryCGroup(std::string_view name, Create should_create)
+    : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)),
+      should_create_(should_create) {
+  if (should_create_ == Create::kDoCreate) {
+    Sudo sudo;
+    int ret = mkdir(cgroup_.c_str(), 0755);
 
-  if (ret != 0) {
-    if (errno == EEXIST) {
-      PCHECK(rmdir(cgroup_.c_str()) == 0)
-          << ": Failed to remove previous cgroup " << cgroup_;
-      ret = mkdir(cgroup_.c_str(), 0755);
+    if (ret != 0) {
+      if (errno == EEXIST) {
+        PCHECK(rmdir(cgroup_.c_str()) == 0)
+            << ": Failed to remove previous cgroup " << cgroup_;
+        ret = mkdir(cgroup_.c_str(), 0755);
+      }
     }
-  }
 
-  if (ret != 0) {
-    PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
-                << ", do you have permission?";
+    if (ret != 0) {
+      PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
+                  << ", do you have permission?";
+    }
   }
 }
 
@@ -59,20 +64,32 @@
   if (pid == 0) {
     pid = getpid();
   }
-  Sudo sudo;
-  util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
-                               std::to_string(pid));
+  if (should_create_ == Create::kDoCreate) {
+    Sudo sudo;
+    util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
+                                 std::to_string(pid));
+  } else {
+    util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
+                                 std::to_string(pid));
+  }
 }
 
 void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
-  Sudo sudo;
-  util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
-                               std::to_string(limit_value));
+  if (should_create_ == Create::kDoCreate) {
+    Sudo sudo;
+    util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
+                                 std::to_string(limit_value));
+  } else {
+    util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
+                                 std::to_string(limit_value));
+  }
 }
 
 MemoryCGroup::~MemoryCGroup() {
-  Sudo sudo;
-  PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
+  if (should_create_ == Create::kDoCreate) {
+    Sudo sudo;
+    PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
+  }
 }
 
 SignalListener::SignalListener(aos::ShmEventLoop *loop,
@@ -127,6 +144,11 @@
           LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
               << "Failed to stop, sending SIGKILL to '" << name_
               << "' pid: " << pid_;
+        } else {
+          PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
+              << "Failed to send SIGKILL to '" << name_ << "' pid: " << pid_;
+          stop_timer_->Schedule(event_loop_->monotonic_now() +
+                                std::chrono::seconds(1));
         }
       })),
       pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
@@ -134,13 +156,11 @@
           event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
       on_change_({on_change}),
       quiet_flag_(quiet_flag) {
-  event_loop_->OnRun([this]() {
-    // Every second poll to check if the child is dead. This is used as a
-    // default for the case where the user is not directly catching SIGCHLD and
-    // calling MaybeHandleSignal for us.
-    child_status_handler_->Schedule(event_loop_->monotonic_now(),
-                                    std::chrono::seconds(1));
-  });
+  // Every second poll to check if the child is dead. This is used as a
+  // default for the case where the user is not directly catching SIGCHLD and
+  // calling MaybeHandleSignal for us.
+  child_status_handler_->Schedule(event_loop_->monotonic_now(),
+                                  std::chrono::seconds(1));
 }
 
 Application::Application(const aos::Application *application,
@@ -202,6 +222,7 @@
       id_ = next_id_++;
       start_time_ = event_loop_->monotonic_now();
       status_ = aos::starter::State::STARTING;
+      latest_timing_report_version_.reset();
       LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
           << "Starting '" << name_ << "' pid " << pid_;
 
@@ -308,6 +329,16 @@
   _exit(EXIT_FAILURE);
 }
 
+void Application::ObserveTimingReport(
+    const aos::monotonic_clock::time_point send_time,
+    const aos::timing::Report *msg) {
+  if (msg->name()->string_view() == name_ && msg->pid() == pid_ &&
+      msg->has_version()) {
+    latest_timing_report_version_ = msg->version()->str();
+    last_timing_report_ = send_time;
+  }
+}
+
 void Application::FetchOutputs() {
   if (capture_stdout_) {
     stdout_pipes_.read->Read(&stdout_);
@@ -462,6 +493,12 @@
   if (exit_code_.has_value()) {
     status_builder.add_last_exit_code(exit_code_.value());
   }
+  status_builder.add_has_active_timing_report(
+      last_timing_report_ +
+          // Leave a bit of margin on the timing report receipt time, to allow
+          // for timing errors.
+          3 * std::chrono::milliseconds(FLAGS_timing_report_ms) >
+      event_loop_->monotonic_now());
   status_builder.add_last_stop_reason(stop_reason_);
   if (pid_ != -1) {
     status_builder.add_pid(pid_);
@@ -572,9 +609,17 @@
             << "Application '" << name_ << "' pid " << pid_
             << " exited with status " << exit_code_.value();
       } else {
-        LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
-            << "Application '" << name_ << "' pid " << pid_
-            << " exited unexpectedly with status " << exit_code_.value();
+        if (quiet_flag_ == QuietLogging::kNo) {
+          std::string version_string =
+              latest_timing_report_version_.has_value()
+                  ? absl::StrCat("'", latest_timing_report_version_.value(),
+                                 "'")
+                  : "unknown";
+          LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
+              << "Application '" << name_ << "' pid " << pid_ << " version "
+              << version_string << " exited unexpectedly with status "
+              << exit_code_.value();
+        }
       }
       if (autorestart()) {
         QueueStart();
@@ -621,4 +666,12 @@
   }
 }
 
+Application::~Application() {
+  start_timer_->Disable();
+  restart_timer_->Disable();
+  stop_timer_->Disable();
+  pipe_timer_->Disable();
+  child_status_handler_->Disable();
+}
+
 }  // namespace aos::starter

diff --git a/aos/starter/subprocess.h b/aos/starter/subprocess.h
index ff62117..784c544 100644
--- a/aos/starter/subprocess.h
+++ b/aos/starter/subprocess.h

@@ -43,7 +43,14 @@
 // Class to use the V1 cgroup API to limit memory usage.
 class MemoryCGroup {
  public:
-  MemoryCGroup(std::string_view name);
+  // Enum to control if MemoryCGroup should create the cgroup and remove it on
+  // its own, or if it should assume it already exists and just use it.
+  enum class Create {
+    kDoCreate,
+    kDoNotCreate,
+  };
+
+  MemoryCGroup(std::string_view name, Create should_create = Create::kDoCreate);
   ~MemoryCGroup();
 
   // Adds a thread ID to be managed by the cgroup.
@@ -54,6 +61,7 @@
 
  private:
   std::string cgroup_;
+  Create should_create_;
 };
 
 // Manages a running process, allowing starting and stopping, and restarting
@@ -73,6 +81,8 @@
               aos::EventLoop *event_loop, std::function<void()> on_change,
               QuietLogging quiet_flag = QuietLogging::kNo);
 
+  ~Application();
+
   flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus(
       flatbuffers::FlatBufferBuilder *builder, util::Top *top);
   aos::starter::State status() const { return status_; };
@@ -92,8 +102,12 @@
 
   void Start() { HandleCommand(aos::starter::Command::START); }
 
+  // Stops the command by sending a SIGINT first, followed by a SIGKILL if it's
+  // still alive in 1s.
   void Stop() { HandleCommand(aos::starter::Command::STOP); }
 
+  // Stops the command the same way as Stop() does, but updates internal state
+  // to reflect that the application was terminated.
   void Terminate();
 
   // Adds a callback which gets notified when the application changes state.
@@ -111,6 +125,7 @@
   bool autostart() const { return autostart_; }
 
   bool autorestart() const { return autorestart_; }
+  void set_autorestart(bool autorestart) { autorestart_ = autorestart; }
 
   const std::string &GetStdout();
   const std::string &GetStderr();
@@ -124,6 +139,23 @@
     memory_cgroup_->SetLimit("memory.limit_in_bytes", limit);
   }
 
+  // Sets the cgroup and memory limit to a pre-existing cgroup which is
+  // externally managed.  This lets us configure the cgroup of an application
+  // without root access.
+  void SetExistingCgroupMemoryLimit(std::string_view name, size_t limit) {
+    if (!memory_cgroup_) {
+      memory_cgroup_ = std::make_unique<MemoryCGroup>(
+          name, MemoryCGroup::Create::kDoNotCreate);
+    }
+    memory_cgroup_->SetLimit("memory.limit_in_bytes", limit);
+  }
+
+  // Observe a timing report message, and save it if it is relevant to us.
+  // It is the responsibility of the caller to manage this, because the lifetime
+  // of the Application itself is such that it cannot own Fetchers readily.
+  void ObserveTimingReport(const aos::monotonic_clock::time_point send_time,
+                           const aos::timing::Report *msg);
+
  private:
   typedef aos::util::ScopedPipe::PipePair PipePair;
 
@@ -192,6 +224,12 @@
   aos::TimerHandler *start_timer_, *restart_timer_, *stop_timer_, *pipe_timer_,
       *child_status_handler_;
 
+  // Version string from the most recent valid timing report for this
+  // application. Cleared when the application restarts.
+  std::optional<std::string> latest_timing_report_version_;
+  aos::monotonic_clock::time_point last_timing_report_ =
+      aos::monotonic_clock::min_time;
+
   std::vector<std::function<void()>> on_change_;
 
   std::unique_ptr<MemoryCGroup> memory_cgroup_;

diff --git a/aos/starter/subprocess_test.cc b/aos/starter/subprocess_test.cc
index 4aa371b..cab22b4 100644
--- a/aos/starter/subprocess_test.cc
+++ b/aos/starter/subprocess_test.cc

@@ -1,5 +1,8 @@
 #include "aos/starter/subprocess.h"
 
+#include <signal.h>
+#include <sys/types.h>
+
 #include "gtest/gtest.h"
 
 #include "aos/events/shm_event_loop.h"
@@ -179,4 +182,47 @@
   EXPECT_EQ(aos::starter::State::STOPPED, error_out.status());
 }
 
+// Tests that Nothing Bad™ happens if the event loop outlives the Application.
+//
+// Note that this is a bit of a hope test, as there is no guarantee that we
+// will trigger a crash even if the resources tied to the event loop in the
+// aos::Application aren't properly released.
+TEST_F(SubprocessTest, ShortLivedApp) {
+  const std::string config_file =
+      ::aos::testing::ArtifactPath("aos/events/pingpong_config.json");
+
+  aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+      aos::configuration::ReadConfig(config_file);
+  aos::ShmEventLoop event_loop(&config.message());
+
+  auto application =
+      std::make_unique<Application>("sleep", "sleep", &event_loop, []() {});
+  application->set_args({"10"});
+  application->Start();
+  pid_t pid = application->get_pid();
+
+  int ticks = 0;
+  aos::TimerHandler *exit_timer = event_loop.AddTimer([&event_loop, &ticks,
+                                                       &application, pid]() {
+    ticks++;
+    if (application && application->status() == aos::starter::State::RUNNING) {
+      // Kill the application, it will autorestart.
+      kill(pid, SIGTERM);
+      application.reset();
+    }
+
+    // event loop lives for longer.
+    if (ticks >= 5) {
+      // Now we exit.
+      event_loop.Exit();
+    }
+  });
+
+  event_loop.OnRun([&event_loop, exit_timer]() {
+    exit_timer->Schedule(event_loop.monotonic_now(),
+                         std::chrono::milliseconds(1000));
+  });
+
+  event_loop.Run();
+}
 }  // namespace aos::starter::testing

diff --git a/aos/test_init.rs b/aos/test_init.rs
new file mode 100644
index 0000000..a8380e2
--- /dev/null
+++ b/aos/test_init.rs

@@ -0,0 +1,21 @@
+use aos_init::init;
+
+autocxx::include_cpp! (
+#include "aos/testing/tmpdir.h"
+
+safety!(unsafe)
+
+generate!("aos::testing::SetTestShmBase")
+);
+
+// TODO(Brian): Should we provide a proc macro attribute that handles calling this?
+/// Initializes things for a test.
+///
+/// # Panics
+///
+/// Panics if non-test initialization has already been performed.
+pub fn test_init() {
+    init();
+    ffi::aos::testing::SetTestShmBase();
+    // TODO(Brian): Do we want any of the other stuff that `:gtest_main` has?
+}

diff --git a/aos/testdata/BUILD b/aos/testdata/BUILD
index 8b87682..076e6c9 100644
--- a/aos/testdata/BUILD
+++ b/aos/testdata/BUILD

@@ -27,7 +27,7 @@
         "invalid_source_node.json",
         "self_forward.json",
     ],
-    visibility = ["//aos:__pkg__"],
+    visibility = ["//visibility:public"],
 )
 
 genrule(

diff --git a/aos/testing/BUILD b/aos/testing/BUILD
index 787fdda..02459e4 100644
--- a/aos/testing/BUILD
+++ b/aos/testing/BUILD

@@ -8,6 +8,7 @@
     visibility = ["//visibility:public"],
     deps = [
         "//aos:init",
+        "//aos/testing:tmpdir",
         "@com_github_gflags_gflags//:gflags",
         "@com_github_google_glog//:glog",
         "@com_google_googletest//:gtest",
@@ -100,6 +101,9 @@
     hdrs = ["tmpdir.h"],
     target_compatible_with = ["@platforms//os:linux"],
     visibility = ["//visibility:public"],
+    deps = [
+        "//aos/ipc_lib:shm_base",
+    ],
 )
 
 cc_library(

diff --git a/aos/testing/gtest_main.cc b/aos/testing/gtest_main.cc
index 21f141f..2fb77fe 100644
--- a/aos/testing/gtest_main.cc
+++ b/aos/testing/gtest_main.cc

@@ -7,6 +7,7 @@
 #include "gtest/gtest.h"
 
 #include "aos/init.h"
+#include "aos/testing/tmpdir.h"
 
 DEFINE_bool(print_logs, false,
             "Print the log messages as they are being generated.");
@@ -14,7 +15,6 @@
               "Print all log messages to FILE instead of standard output.");
 
 namespace aos {
-void SetShmBase(const std::string_view base) __attribute__((weak));
 
 namespace testing {
 
@@ -48,12 +48,7 @@
 
   // Point shared memory away from /dev/shm if we are testing.  We don't care
   // about RT in this case, so if it is backed by disk, we are fine.
-  if (::aos::SetShmBase) {
-    const char *tmpdir_c_str = getenv("TEST_TMPDIR");
-    if (tmpdir_c_str != nullptr) {
-      aos::SetShmBase(tmpdir_c_str);
-    }
-  }
+  aos::testing::SetTestShmBase();
 
   return RUN_ALL_TESTS();
 }

diff --git a/aos/testing/test_shm.cc b/aos/testing/test_shm.cc
index 0787e5a..8dc189a 100644
--- a/aos/testing/test_shm.cc
+++ b/aos/testing/test_shm.cc

@@ -2,6 +2,8 @@
 
 #include <sys/mman.h>
 
+#include "gflags/gflags.h"
+
 #include "aos/logging/logging.h"
 #include "aos/testing/test_logging.h"
 

diff --git a/aos/testing/tmpdir.cc b/aos/testing/tmpdir.cc
index 15e3c13..7287603 100644
--- a/aos/testing/tmpdir.cc
+++ b/aos/testing/tmpdir.cc

@@ -3,16 +3,24 @@
 #include <cstdlib>
 #include <string>
 
+#include "aos/ipc_lib/shm_base.h"
+
 namespace aos {
 namespace testing {
 
-std::string TestTmpDir() {
+namespace {
+std::string TestTmpDirOr(std::string fallback) {
   const char *tmp_dir = std::getenv("TEST_TMPDIR");
   if (tmp_dir != nullptr) {
     return tmp_dir;
   }
-  return "/tmp";
+  return fallback;
 }
+}  // namespace
+
+std::string TestTmpDir() { return TestTmpDirOr("/tmp"); }
+
+void SetTestShmBase() { SetShmBase(TestTmpDirOr(FLAGS_shm_base)); }
 
 }  // namespace testing
 }  // namespace aos

diff --git a/aos/testing/tmpdir.h b/aos/testing/tmpdir.h
index 7e64342..8eabf86 100644
--- a/aos/testing/tmpdir.h
+++ b/aos/testing/tmpdir.h

@@ -9,6 +9,10 @@
 // Returns a usable temporary directory.
 std::string TestTmpDir();
 
+// Sets shm_base to a folder inside of TEST_TMPDIR if set, or --shm_base
+// otherwise.
+void SetTestShmBase();
+
 }  // namespace testing
 }  // namespace aos
 

diff --git a/aos/util/file.cc b/aos/util/file.cc
index 52657a9..973ab3c 100644
--- a/aos/util/file.cc
+++ b/aos/util/file.cc

@@ -200,11 +200,19 @@
   PCHECK(file_.get() != -1) << ": opening " << filename;
 }
 
-absl::Span<char> FileReader::ReadContents(absl::Span<char> buffer) {
+std::optional<absl::Span<char>> FileReader::ReadContents(
+    absl::Span<char> buffer) {
   PCHECK(0 == lseek(file_.get(), 0, SEEK_SET));
   const ssize_t result = read(file_.get(), buffer.data(), buffer.size());
+  if (result < 0) {
+    // Read timeout for an i2c request returns this.
+    if (errno == EREMOTEIO) {
+      return std::nullopt;
+    }
+  }
+
   PCHECK(result >= 0);
-  return {buffer.data(), static_cast<size_t>(result)};
+  return absl::Span<char>{buffer.data(), static_cast<size_t>(result)};
 }
 
 FileWriter::FileWriter(std::string_view filename, mode_t permissions)
@@ -216,21 +224,25 @@
 // absl::SimpleAtoi doesn't interpret a leading 0x as hex, which we need here.
 // Instead, we use the flatbufers API, which unfortunately relies on NUL
 // termination.
-int32_t FileReader::ReadInt32() {
+std::optional<int32_t> FileReader::ReadInt32() {
   // Maximum characters for a 32-bit integer, +1 for the NUL.
   // Hex is the same size with the leading 0x.
   std::array<char, 11> buffer;
   int32_t result;
-  const auto string_span =
+  const std::optional<absl::Span<char>> string_span =
       ReadContents(absl::Span<char>(buffer.data(), buffer.size())
                        .subspan(0, buffer.size() - 1));
+  if (!string_span.has_value()) {
+    return std::nullopt;
+  }
+
   // Verify we found the newline.
-  CHECK_EQ(buffer[string_span.size() - 1], '\n');
+  CHECK_EQ(buffer[string_span->size() - 1], '\n');
   // Truncate the newline.
-  buffer[string_span.size() - 1] = '\0';
+  buffer[string_span->size() - 1] = '\0';
   CHECK(flatbuffers::StringToNumber(buffer.data(), &result))
       << ": Error parsing string to integer: "
-      << std::string_view(string_span.data(), string_span.size());
+      << std::string_view(string_span->data(), string_span->size());
 
   return result;
 }

diff --git a/aos/util/file.h b/aos/util/file.h
index da5b29f..e825e4c 100644
--- a/aos/util/file.h
+++ b/aos/util/file.h

@@ -64,18 +64,22 @@
  public:
   FileReader(std::string_view filename);
   // Reads the entire contents of the file into the internal buffer and returns
-  // a string_view of it.
-  // Note: The result may not be null-terminated.
-  absl::Span<char> ReadContents(absl::Span<char> buffer);
+  // a string_view of it.  Returns nullopt if we failed to read the contents
+  // (currently only on EREMOTEIO). Note: The result may not be null-terminated.
+  std::optional<absl::Span<char>> ReadContents(absl::Span<char> buffer);
   // Returns the value of the file as a string, for a fixed-length file.
   // Returns nullopt if the result is smaller than kSize. Ignores any
   // bytes beyond kSize.
   template <int kSize>
   std::optional<std::array<char, kSize>> ReadString() {
     std::array<char, kSize> result;
-    const absl::Span<char> used_span =
+    const std::optional<absl::Span<char>> used_span =
         ReadContents(absl::Span<char>(result.data(), result.size()));
-    if (used_span.size() == kSize) {
+    if (!used_span.has_value()) {
+      return std::nullopt;
+    }
+
+    if (used_span->size() == kSize) {
       return result;
     } else {
       return std::nullopt;
@@ -83,8 +87,8 @@
   }
   // Returns the value of the file as an integer. Crashes if it doesn't fit in a
   // 32-bit integer. The value may start with 0x for a hex value, otherwise it
-  // must be base 10.
-  int32_t ReadInt32();
+  // must be base 10.  Returns nullopt if we failed to read the file.
+  std::optional<int32_t> ReadInt32();
 
  private:
   aos::ScopedFD file_;

diff --git a/aos/util/file_test.cc b/aos/util/file_test.cc
index ec4bfe4..905a2cd 100644
--- a/aos/util/file_test.cc
+++ b/aos/util/file_test.cc

@@ -91,10 +91,10 @@
   aos::ScopedRealtime realtime;
   {
     std::array<char, 20> contents;
-    absl::Span<char> read_result =
+    std::optional<absl::Span<char>> read_result =
         reader.ReadContents({contents.data(), contents.size()});
     EXPECT_EQ("123456789\n",
-              std::string_view(read_result.data(), read_result.size()));
+              std::string_view(read_result->data(), read_result->size()));
   }
   {
     std::optional<std::array<char, 10>> read_result = reader.ReadString<10>();

diff --git a/aos/util/scoped_pipe.cc b/aos/util/scoped_pipe.cc
index e2e0ca5..d64fad1 100644
--- a/aos/util/scoped_pipe.cc
+++ b/aos/util/scoped_pipe.cc

@@ -55,8 +55,8 @@
         read(fd(), buffer->data() + buffer->size() - kBufferSize, kBufferSize);
     if (result == -1) {
       if (errno == EAGAIN || errno == EWOULDBLOCK) {
-        buffer->resize(original_size);
-        return 0;
+        buffer->resize(original_size + read_bytes);
+        return read_bytes;
       }
       PLOG(FATAL) << "Error on reading pipe.";
     } else if (result < kBufferSize) {

diff --git a/aos/util/scoped_pipe_test.cc b/aos/util/scoped_pipe_test.cc
index c71e272..0e024c2 100644
--- a/aos/util/scoped_pipe_test.cc
+++ b/aos/util/scoped_pipe_test.cc

@@ -44,6 +44,21 @@
   }
 }
 
+// Tests using string read/write methods on the ScopedPipe objects.
+TEST(ScopedPipeTest, StringPipe2048) {
+  ScopedPipe::PipePair pipe = ScopedPipe::MakePipe();
+  std::string buffer;
+  ASSERT_EQ(0u, pipe.read->Read(&buffer))
+      << "Shouldn't get anything on empty read.";
+  ASSERT_TRUE(buffer.empty());
+
+  std::string a(2048, 'a');
+  pipe.write->Write(absl::Span<const uint8_t>(
+      reinterpret_cast<const uint8_t *>(a.data()), a.size()));
+  ASSERT_EQ(2048u, pipe.read->Read(&buffer));
+  ASSERT_EQ(a, buffer);
+}
+
 // Tests that calling SetCloexec succeeds and does indeed set FD_CLOEXEC.
 TEST(ScopedPipeTest, SetCloexec) {
   ScopedPipe::PipePair pipe = ScopedPipe::MakePipe();

diff --git a/debian/BUILD b/debian/BUILD
index e2a8541..d8f1753 100644
--- a/debian/BUILD
+++ b/debian/BUILD

@@ -112,6 +112,27 @@
 )
 
 download_packages(
+    name = "download_rsync_packages",
+    excludes = [
+    ],
+    packages = [
+        "rsync",
+    ],
+)
+
+download_packages(
+    name = "download_ssh_packages",
+    excludes = [
+        "libcbor0.8",
+        "libsemanage2",
+        "libsepol2",
+    ],
+    packages = [
+        "ssh",
+    ],
+)
+
+download_packages(
     name = "download_gtk_runtime",
     excludes = [
         "libstdc++6",
@@ -425,6 +446,7 @@
 
 exports_files([
     "ssh_wrapper.sh",
+    "rsync_wrapper.sh",
     "curl.BUILD",
     "BUILD.zlib.bazel",
 ])

diff --git a/debian/packages.bzl b/debian/packages.bzl
index e6e34a6..babc0c0 100644
--- a/debian/packages.bzl
+++ b/debian/packages.bzl

@@ -24,7 +24,7 @@
 # 6. Add a new "new_http_archive" entry to the WORKSPACE file for the tarball
 #    you just uploaded.
 
-def download_packages(name, packages, excludes = [], force_includes = [], force_excludes = [], target_compatible_with = None):
+def download_packages(name, packages, excludes = [], force_includes = [], force_excludes = [], target_compatible_with = None, release = "bullseye"):
     """Downloads a set of packages as well as their dependencies.
 
     You can also specify excludes in case some of the dependencies are meta
@@ -65,8 +65,8 @@
 # --- end runfiles.bash initialization v2 ---
 
 
-exec "$$(rlocation org_frc971/debian/download_packages)" %s %s %s %s "$$@"
-END""" % (force_includes, force_excludes, excludes_list, package_list),
+exec "$$(rlocation org_frc971/debian/download_packages)" %s %s %s %s --release=%s "$$@"
+END""" % (force_includes, force_excludes, excludes_list, package_list, release),
         target_compatible_with = target_compatible_with,
     )
     native.sh_binary(

diff --git a/debian/rsync.BUILD b/debian/rsync.BUILD
index 4fe45b1..d761ace 100644
--- a/debian/rsync.BUILD
+++ b/debian/rsync.BUILD

@@ -1,5 +1,26 @@
-filegroup(
+genrule(
+    name = "copy_rsync_wrapper",
+    srcs = ["@//debian:rsync_wrapper.sh"],
+    outs = ["rsync_wrapper.sh"],
+    cmd = "cp $< $@",
+)
+
+sh_binary(
     name = "rsync",
-    srcs = ["usr/bin/rsync"],
+    srcs = [
+        "rsync_wrapper.sh",
+    ],
+    data = [
+        "usr/bin/rsync",
+        ":libs",
+        "@bazel_tools//tools/bash/runfiles",
+    ],
     visibility = ["//visibility:public"],
 )
+
+filegroup(
+    name = "libs",
+    srcs = glob([
+        "usr/lib/x86_64-linux-gnu/**",
+    ]),
+)

diff --git a/debian/rsync.bzl b/debian/rsync.bzl
index 6be610d..768a98c 100644
--- a/debian/rsync.bzl
+++ b/debian/rsync.bzl

@@ -1,3 +1,9 @@
 files = {
-    "rsync_3.1.2-1+deb9u2_amd64.deb": "f2987623a6e5b5aedf56e679bedd2e6f7d54cdb4815ac4149c4d135da16ff9c2",
+    "init-system-helpers_1.60_all.deb": "43420922c5e3aa747f8854236bf381a35179bba3885b242edb104751dad20644",
+    "liblz4-1_1.9.3-2_amd64.deb": "79ac6e9ca19c483f2e8effcc3401d723dd9dbb3a4ae324714de802adb21a8117",
+    "libpopt0_1.18-2_amd64.deb": "2f9fca8afbc5a18211ad46fa5f5df323662ba4d5212fa36bfc30dd551ae86b28",
+    "libssl1.1_1.1.1n-0+deb11u5_amd64.deb": "08be73a6a5454a8978c5a71ea5ca4b3a6909ce6cc927890729ebd6f9af12d9d8",
+    "libxxhash0_0.8.0-2_amd64.deb": "3fb82550a71d27d05672472508548576dfb34486847bc860d3066cda5aaf186f",
+    "lsb-base_11.1.0_all.deb": "89ed6332074d827a65305f9a51e591dff20641d61ff5e11f4e1822a9987e96fe",
+    "rsync_3.2.3-4+deb11u1_amd64.deb": "2ba613ac761266dcfc8518b06c257c790c4d02ea311163829871dc38fbf08ba0",
 }

diff --git a/debian/rsync_wrapper.sh b/debian/rsync_wrapper.sh
new file mode 100755
index 0000000..9a8d4ba
--- /dev/null
+++ b/debian/rsync_wrapper.sh

@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# --- begin runfiles.bash initialization v2 ---
+# Copy-pasted from the Bazel Bash runfiles library v2.
+set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
+source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
+  source "$0.runfiles/$f" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
+# --- end runfiles.bash initialization v2 ---
+
+LIB_PATH="$(rlocation rsync/usr/lib/x86_64-linux-gnu/libcrypto.so.1.1)"
+LIB_PATH="${LIB_PATH%/libcrypto.so.1.1}"
+export LD_LIBRARY_PATH="${LIB_PATH}"
+
+
+TOOL_PATH="$(rlocation rsync/usr/bin/rsync)"
+
+exec "${TOOL_PATH}" "$@"

diff --git a/debian/ssh.bzl b/debian/ssh.bzl
index 77b913c..9150540 100644
--- a/debian/ssh.bzl
+++ b/debian/ssh.bzl

@@ -1,4 +1,42 @@
 files = {
-    "openssh-client_7.9p1-10+deb10u1_amd64.deb": "1c30bcaf37dafe198783cf691096fe557e8eacbc9435631f51af62b3f705ee12",
-    "libssl1.1_1.1.1d-0+deb10u2_amd64.deb": "31c15130e0e4b2c907ef7cd92e50be23320a22c0c3b54e130b5258fe6bd8df2d",
+    "adduser_3.118_all.deb": "bd71dd1ab8dcd6005390708f23741d07f1913877affb7604dfd55f85d009aa2b",
+    "cdebconf_0.260_amd64.deb": "f54b3308b019b3d4246f5ae114d7d8a489e26dcdc80db85fc8eec089fc31acb6",
+    "coreutils_8.32-4+b1_amd64.deb": "3558a412ab51eee4b60641327cb145bb91415f127769823b68f9335585b308d4",
+    "init-system-helpers_1.60_all.deb": "43420922c5e3aa747f8854236bf381a35179bba3885b242edb104751dad20644",
+    "libattr1_2.4.48-6_amd64.deb": "af3c3562eb2802481a2b9558df1b389f3c6d9b1bf3b4219e000e05131372ebaf",
+    "libbsd0_0.11.3-1_amd64.deb": "284a7b8dcfcad74770f57360721365317448b38ab773db542bf630e94e60c13e",
+    "libdb5.3_5.3.28+dfsg1-0.8_amd64.deb": "00b9e63e287f45300d4a4f59b6b88e25918443c932ae3e5845d5761ae193c530",
+    "libdebian-installer4_0.121_amd64.deb": "b97d270ea3588bcff281b2595ff89752e0f829e54c8c7a4cc9a46492692cd0fd",
+    "libedit2_3.1-20191231-2+b1_amd64.deb": "ac545f6ad10ba791aca24b09255ad1d6d943e6bc7c5511d5998e104aee51c943",
+    "libfido2-1_1.6.0-2_amd64.deb": "e9efa10b4e45e8c941883b4bea29bc68dc5a298b8619bd786a7ca1675ca0c197",
+    "libgcrypt20_1.8.7-6_amd64.deb": "7a2e0eef8e0c37f03f3a5fcf7102a2e3dc70ba987f696ab71949f9abf36f35ef",
+    "libgmp10_6.2.1+dfsg-1+deb11u1_amd64.deb": "fc117ccb084a98d25021f7e01e4dfedd414fa2118fdd1e27d2d801d7248aebbc",
+    "libgpg-error0_1.38-2_amd64.deb": "16a507fb20cc58b5a524a0dc254a9cb1df02e1ce758a2d8abde0bc4a3c9b7c26",
+    "liblz4-1_1.9.3-2_amd64.deb": "79ac6e9ca19c483f2e8effcc3401d723dd9dbb3a4ae324714de802adb21a8117",
+    "libmd0_1.0.3-3_amd64.deb": "9e425b3c128b69126d95e61998e1b5ef74e862dd1fc953d91eebcc315aea62ea",
+    "libncurses6_6.2+20201114-2+deb11u1_amd64.deb": "030173bda939906c849a022823b8b90a1984ccc249a8ee9b54ea1f08acc24e9e",
+    "libncursesw6_6.2+20201114-2+deb11u1_amd64.deb": "4a53efece402caaa798649f74bfb63fe21ac2fbcacf302554e46fbd17e5b30fc",
+    "libnewt0.52_0.52.21-4+b3_amd64.deb": "5a59623010dd29b006335c1bb989303031f1148080b90e76e857cd6298aa546e",
+    "libpam-modules-bin_1.4.0-9+deb11u1_amd64.deb": "abbbd181329c236676222d3e912df13f8d1d90a117559edd997d90006369e5c8",
+    "libpam-modules_1.4.0-9+deb11u1_amd64.deb": "ca1e121700bf4b3eb33e30e0774d3e63e1adae9d4b6a940ea3501225db3cc287",
+    "libpam-runtime_1.4.0-9+deb11u1_all.deb": "d98a68a56386d0992446417a4ee9fa685ebe841e81303a7d4f45cdd4c133c3f6",
+    "libpam0g_1.4.0-9+deb11u1_amd64.deb": "496771218fb585bb716fdae6ef8824dbfb5d544b4fa2f3cd4d0e4d7158ae2220",
+    "libprocps8_3.3.17-5_amd64.deb": "0a60017f0229cd4eec95b9f354c68312cc4ca4770ba8c01f545fd9c02b34e8a0",
+    "libslang2_2.3.2-5_amd64.deb": "107ad70aba3dc4dab2bc0fe11c3dd7c2afe9549dd45f4da3f4cf9d360e171eba",
+    "libssl1.1_1.1.1n-0+deb11u5_amd64.deb": "08be73a6a5454a8978c5a71ea5ca4b3a6909ce6cc927890729ebd6f9af12d9d8",
+    "libsystemd0_247.3-7+deb11u4_amd64.deb": "e6f3e65e388196a399c1a36564c38ad987337350358732056227db1b6e708878",
+    "libtextwrap1_0.1-14.2_amd64.deb": "6626eee49a3ad10c596955f1180bee6c937f5e9ea1404085516a29010ab8bd23",
+    "libtinfo6_6.2+20201114-2+deb11u1_amd64.deb": "92ac8a8c12f02a6fe08c47f33ea6fb313e1b9480484e26eab34e2058ea59fdb4",
+    "libudev1_247.3-7+deb11u4_amd64.deb": "9274ca1aa37fcdf5895dad1de0895162351099ef8dff8a62f2f4c9eb181a8fce",
+    "libwrap0_7.6.q-31_amd64.deb": "c6aa9c653857d807cff31682b5158722e8b16eeb3cec443d34d6eba52312e701",
+    "lsb-base_11.1.0_all.deb": "89ed6332074d827a65305f9a51e591dff20641d61ff5e11f4e1822a9987e96fe",
+    "openssh-client_8.4p1-5+deb11u1_amd64.deb": "5b908fa946a425c6f2b4e4b234f4e42ae61c35c6655dc06b9746145e09b6cb2f",
+    "openssh-server_8.4p1-5+deb11u1_amd64.deb": "7118fe69fee753a08480c97d9d873dfca7c7a6972366515ae26d8428301a0dea",
+    "openssh-sftp-server_8.4p1-5+deb11u1_amd64.deb": "b77715c95ef748aad458d1291df307bbe4ed2e6099d02f2c47dae961bd175cb7",
+    "passwd_4.8.1-1_amd64.deb": "542593f26502e87b4276fa778e6e3ae52e66b973979986fff77803d9fcb2c2e8",
+    "procps_3.3.17-5_amd64.deb": "ac8edf0517abe09637c36651cb6a59e10948b2879f3af9003b9145b2128a7a08",
+    "runit-helper_2.10.3_all.deb": "fb8add1955628b2ad896318553ac1e3bc7cfa2a7058e9c9dbaa23baa21fc53a7",
+    "sensible-utils_0.0.14_all.deb": "b9a447dc4ec8714196b037e20a2209e62cd669f5450222952f259bda4416b71f",
+    "ssh_8.4p1-5+deb11u1_all.deb": "3ec5c5a0ea62d534bed2b339a669920d90cf4777899fb90d3a9a45f747c107eb",
+    "ucf_3.0043_all.deb": "ebef6bcd777b5c0cc2699926f2159db08433aed07c50cb321fd828b28c5e8d53",
 }

diff --git a/debian/ssh_wrapper.sh b/debian/ssh_wrapper.sh
index 687ed87..3343887 100755
--- a/debian/ssh_wrapper.sh
+++ b/debian/ssh_wrapper.sh

@@ -11,8 +11,8 @@
   { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
 # --- end runfiles.bash initialization v2 ---
 
-LIB_PATH="$(rlocation ssh/usr/lib/x86_64-linux-gnu/libcrypto.so.1.0.0)"
-LIB_PATH="${LIB_PATH%/libcrypto.so.1.0.0}"
+LIB_PATH="$(rlocation ssh/usr/lib/x86_64-linux-gnu/libcrypto.so.1.1)"
+LIB_PATH="${LIB_PATH%/libcrypto.so.1.1}"
 export LD_LIBRARY_PATH="${LIB_PATH}"
 
 TOOL_PATH="$(rlocation ssh/%(TOOL))"

diff --git a/documentation/aos/docs/reference.md b/documentation/aos/docs/reference.md
index 05099c7..917b410 100644
--- a/documentation/aos/docs/reference.md
+++ b/documentation/aos/docs/reference.md

@@ -158,7 +158,23 @@
   // Pointer to the data.
   const void *data;
 
-  // buffer_index and source_boot_uuid members omitted.
+  // Index of the message buffer. This will be in [0, NumberBuffers) on
+  // read_method=PIN channels, and -1 for other channels.
+  //
+  // This only tells you about the underlying storage for this message, not
+  // anything about its position in the queue. This is only useful for advanced
+  // zero-copy use cases, on read_method=PIN channels.
+  //
+  // This will uniquely identify a message on this channel at a point in time.
+  // For senders, this point in time is while the sender has the message. With
+  // read_method==PIN, this point in time includes while the caller has access
+  // to this context. For other read_methods, this point in time may be before
+  // the caller has access to this context, which makes this pretty useless.
+  int buffer_index;
+
+  // UUID of the remote node which sent this message, or this node in the case
+  // of events which are local to this node.
+  UUID source_boot_uuid = UUID::Zero();
 
   // Efficiently copies the flatbuffer into a FlatbufferVector, allocating
   // memory in the process.  It is vital that T matches the type of the
@@ -781,14 +797,18 @@
 3. You need to access the most-recently-sent message that may have been sent
    before your process started.
 
-There are two main options for fetching data with a Fetcher:
+There are four main options for fetching data with a Fetcher:
 
 1. `Fetch()`: Allows you to fetch the latest message on a channel.
 2. `FetchNext()`: Allows you to fetch the message immediately after the
    previously fetched message. This lets you use a fetcher to observe
    every single message on the channel.
+3. `FetchIf()` and `FetchNextIf()`: Identical to the above, but take a
+   predicate that causes the fetcher to only fetch if the next message
+   causes the predicate to return true (e.g., if you only want to fetch
+   messages up until a certain point in time).
 
-Both `Fetch*()` calls return true if they got a new message and false otherwise,
+All `Fetch*()` calls return true if they got a new message and false otherwise,
 making
 ```cpp
 if (fetcher_.Fetch()) {
@@ -1050,6 +1070,10 @@
 schedulers, as well as for managing the malloc hook that intercepts attempted
 memory allocations.
 
+Additionally, the ROS2 design docs website has a [reasonable
+introduction](https://design.ros2.org/articles/realtime_background.html)
+to working with realtime code.
+
 ### Timing Reports
 
 Timing reports are an FlatBuffer message sent out periodically by an
@@ -1252,14 +1276,523 @@
 
 ## ShmEventLoop
 
-## Realtime Code
+The `ShmEventLoop` is an implementation of the `EventLoop` API that is intended
+to run in realtime on a Linux system and uses shared memory to communicate
+between processes. This is currently the only `EventLoop` implementation for
+actually running realtime processes (the `SimulatedEventLoop` being the only
+other implementation of the `EventLoop` API, and it is only meant for
+simulation).
 
-The ROS2 design docs website has a [reasonable
-introduction](https://design.ros2.org/articles/realtime_background.html)
-to working with realtime code.
+Additionally, there are a set of common utilities & applications that go with
+the `ShmEventLoop` to form a complete realtime AOS system:
+
+* The `message_bridge_server` and `message_bridge_client` processes use the
+  `ShmEventLoop` to read data from shared memory channels, forward it to other
+  node(s) over SCTP, and then republish the data onto the shared memory channels
+  on the other node(s).
+* The `starterd` manages starting & stopping applications, and allows
+  dynamically starting, stopping, and viewing the status of applications by
+  way of AOS channels.
+* The logger (which is sometimes configured in specialized ways, but generally
+  uses the `LogWriter` class).
+* `aos_dump`, `aos_send`, `aos_starter`, `aos_timing_report_streamer`, and other
+  utilities allow engineers to debug the system at the command line by exposing
+  AOS channels in various ways (see [Command-line Utilities](#command-line-utilities)).
+
+The `ShmEventLoop` does a few key things beyond just implementing what is
+required by the `EventLoop` API:
+
+* Has `Run()` and `Exit()` calls to directly start/stop execution of the event
+  loop.
+* Does signal handling to capture `SIGINT` and automatically exit.
+* Allows controlling the scheduling of the process (realtime priority, CPU
+  affinity)---technically these are actually part of the `EventLoop` API,
+  but are meaningless for any other `EventLoop` implementations.
+* Exposes the `EPoll` implementation used to back the `EventLoop` (this
+  becomes useful when wanting to interact with sockets, files, and sometimes
+  even other event loops/schedulers).
+
+### Design of the `ShmEventLoop`
+
+The goal of the `ShmEventLoop` is to provide an `EventLoop` implementation that
+can run on most Linux systems, and which has good enough performance to support
+soft/firm real-time system where large amounts of data (e.g., raw images) may
+need to be moved over the pub-sub channels. In order to implement the core
+`EventLoop` API, we use two things:
+
+1. An IPC (InterProcess Communication) library using a lockless shared-memory
+   queue for managing the pub-sub channels.
+2. `epoll` for managing event scheduling.
+
+For each `EventLoop` feature, this means:
+
+* `Watcher`s are implemented by having the sending event loop general signals,
+  where each `Watcher` then has an associated `signalfd` that we use `epoll` to
+  watch and wakeup on when the new messages arrived.
+* `Timer`s and `PhasedLoop`s are implemented using `timerfd` and `epoll` to
+  wakeup when the timers expire.
+* `Sender`s and `Fetcher`s do not have to directly interact with the event
+  scheduling (beyond generating signals on sends, in the case of `Sender`s),
+  but do use the IPC library.
+
+Outside of the "core" `EventLoop` API, the `ShmEventLoop` is also responsible
+for setting the current process priority, pinning it to the requested CPU
+core(s), and doing things like preventing calls to `malloc` while the
+`ShmEventLoop` is [Running](#running).
+
+For additional detail on the underlying IPC design, reference the [Design
+Doc](https://docs.google.com/document/d/10xulameLtEqjBFkm54UcN-5N-w5Q_XFNILvNf1Jl1Y4/edit#heading=h.y9blqcmsacou)
+and the code at `//aos/ipc_lib:lockless_queue`.
+
+### IPC Performance Considerations
+
+This section provides a discussion of how to use the `EventLoop` API in
+situations where you may have strong performance constraints (e.g.,
+you are processing large numbers of camera images that require transferring
+large amounts of data).
+
+Some discussion in this section can theoretically apply to any `EventLoop`
+implementation---everything discussed here just uses the generic `EventLoop`
+API and does not actually require doing anything that is specific to the
+`ShmEventLoop`. However, it is useful to discuss how the actual implementation
+works to understand the performance implications of different options.
+
+At a high level, the most common issues which we have observed regarding
+performance are:
+
+1. Copying messages can be very expensive, particularly for raw camera images;
+   for this reason, the `EventLoop` API provides zero-copy constructs that
+   allow the sender to construct messages in-place in shared memory and for
+   readers to read the message directly from shared memory without copying
+   it first. There are a variety of places where the "normal" APIs will do
+   copies by default, as well as some convenience APIs that are more rarely
+   used but which do do extraneous copies.
+2. For processes which must consume large numbers of individual messages (e.g.,
+   a logger), using `Watcher`s may get expensive due to the cost of waking
+   up for every signal. Using `Fetcher`s + a polling timer can mitigate this.
+3. Be aware that querying the clocks on the `ShmEventLoop` with
+   `monotonic_now()` and `realtime_now()` calls will actually query the system
+   clocks---if you want the time of the current event, use the [Event Loop
+   Context](#event-loop-context) (note that "doing too many clock queries"
+   is not typically a major issue).
+
+#### Avoiding Copies
+
+Copying around large chunks of memory can be expensive. As such, the `EventLoop`
+API is designed to allow you to avoid extraneous copies if you do not want to do
+so. This does mean paying attention to what you are doing at each step of the
+process.
+
+*Sending*: When sending a message, you should make sure to use the
+`aos::Sender::MakeBuilder()` call, which provides a `FlatBufferBuilder` that
+constructs your message in-place in shared-memory (the `Sender` will acquire
+ownership of a single slot in the shared memory queue, and leave the message
+unpublished until you call `Send`). If you need to fill your message with
+a large blob (e.g., an image), you can use the
+[CreateUninitializedVector](https://flatbuffers.dev/classflatbuffers_1_1_flat_buffer_builder.html#a2305b63d367845972b51669dd995cc50)
+method to get a pointer to a fixed-length buffer where you can fill in your
+data. Be aware that the `aos::Sender::Send()` method which takes a
+`NonSizePrefixedFlatbuffer` will do a copy, because it takes a FlatBuffer
+which has been constructed outside if its shared memory buffer. This is distinct
+from the `aos::Sender::Builder::Send()` calls, which assume that you have built
+your flatbuffer up using the provided `Builder` and so don't need to do extra
+copies---the `aos::Sender::Builder::Send()` calls are what almost all existing
+code uses. As an aside,
+because `Sender`s must construct messages in-place, there is a configurable
+limit on the maximum number of senders per-channel. If we allowed arbitrarily
+many senders per channel, then they could consume all of the slots in the
+shared memory queue and prevent any messages from actually flowing.
+
+*Receving*: By default, `Fetcher`s and `Watcher`s will copy a message on
+receipt. This allows us to allow arbitrarily many processes to be fetching on a
+given channel by default. However, if the `read_method` for a channel is set to
+`PIN` in the [configuration](#Configurations) then each reader will acquire a
+slot in the shared memory queue (this causes there to be a limit on the maximum
+number of allowed readers). If `PIN`d, then when you read the message you are
+reading directly from shared memory. Note that there is an independent
+`num_watchers` setting in the configuration for a channel; this maximum exists
+because the shared memory queue must have a fixed-length list of processes to
+notify when a message is sent.
+
+*`NoArgWatcher`s*: Regardless of whether you want zero-copy performance or not,
+being aware of `NoArgWatcher`s is useful as they allow you to receive a
+callback when a message is received with needing to actually copy the message
+out. This is typically paired with a `Fetcher` for that message, although there
+may also be situations where you just need to trigger some work or event
+whenever a channel is sent on, without caring about the message contents. See
+[Watchers](#watchers) for more discussion.
+
+#### High-rate Messages
+
+When dealing with high-rate (usually starting at ~1 kHz) messages, the context
+switches associated with using a watcher to wake up your process on every single
+message can become non-trivial. In applications where you really do want to wake
+up for every single messages, this may still be appropriate, but if instead you
+just need to ensure that you are handling every message in a reasonably timely
+manner it can be much cheaper to set up a periodic timer at a lower frequency
+(e.g., the logger generally polls at ~10 Hz; but even just getting below a
+kilohertz is typically good). In order to ensure that you read every message,
+you can then use a [Fetcher](#fetchers) with `FetchNext()` to iterate over
+all the messages received in the last polling period.
+
+### Typical `ShmEventLoop` usage
+
+There is a good sample `main()` in the [Getting Started](/getting_started/#writing-the-ping-main);
+the key pattern that is typically followed is:
+
+```cpp
+// We pull in a config that is stored somewhere on disk (as pointed to by
+// a --config flag).
+aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+    aos::configuration::ReadConfig(FLAGS_config);
+
+aos::ShmEventLoop event_loop(&config.message());
+
+// The application being run is provided with an EventLoop*, which enables it to
+// access the EventLoop API without knowing whether it is on a realtime system
+// or just running in simulation.
+aos::Ping ping(&event_loop);
+
+// Actually run the EventLoop. This will block until event_loop.Exit() is
+// called or we receive a signal to exit (e.g., a Ctrl-C on the command line).
+event_loop.Run();
+```
+
+### Running (and Exiting) the `ShmEventLoop`
+
+In the vast majority of cases, your code will simply call `Run()` on the
+`ShmEventLoop` and not have to think about it any more. However, there are
+situations where you need to worry about more complexity.
+
+A few things to be aware of:
+
+* When the `ShmEventLoop` is [Running](#running), the process priority will be
+  elevated and if the priority is real-time it will die on attempted `malloc`s.
+  I.e., "Running" == Real-time.
+* If you wish to programmatically exit your process (e.g., it is a one-shot
+  process that just does some work and exits), use the `ExitHandle` API.
+  This way you can use `ShmEventLoop::MakeExitHandle` to provide your
+  application with a way to exit the process at runtime, and then use a
+  different `ExitHandle` implementation for testing.
+
+### `EPoll` interface
+
+As previously mentioned, the `ShmEventLoop` uses `epoll` under the hood to
+trigger wakeups on new events. Typically, you do not need to care about this;
+however, the `EPoll` class is exposed from the `ShmEventLoop` in order to enable
+you to interact with the system outside of the standard `EventLoop` API while
+still having your callbacks and events occur within the main thread of the
+`EventLoop`. In order to enable testing & simulation, most applications that do
+this will take an abstract `EventLoop*` and an `EPoll*` in their constructor;
+however, for cases where the application truly only makes sense to run against
+a `ShmEventLoop`, it may take a `ShmEventLoop` directly. By breaking out of
+the normal `EventLoop` API your application will become harder to simulate,
+replay, etc. However, this is useful in some situations, e.g.:
+
+* An application that must provide a web server of some sort to external
+  applications, such as for a live debugging webpage.
+* Interacting with a CAN bus, UDP/TCP socket, etc. in order to interface with
+  and control some external system.
+* Building a camera driver that must read/send out camera images when they
+  become available.
+
+## Multi-Node
+
+AOS is built to support distributed systems where applications are running on
+separate devices which communicate with one another. The API is designed on the
+theory that typical applications should not really need to care about what node
+a channel that they are sending/listening on is forwarded to/from. However,
+it is expected that some applications will care and that you will need to care
+about the data flow for the system as a whole.
+
+There are two key things that change when you begin working with a multi-node
+AOS system as compared to a single-node system (or when working within a single
+node on a multi-node system):
+
+1. Clocks & time may not be fully synchronized across nodes. By default, AOS
+   also allows individual nodes on a system to reboot, complicating clock
+   management further.
+2. Message delivery between nodes can be unreliable, and will generally have
+   significantly worse latency than message delivery within a node. Note that
+   there are options to, e.g., allow for reliable message delivery but any
+   network-based messaging will be constrained by the networking setup of your
+   system.
+
+
+See [Configuring Multi-Node Systems](#configuring-multi-node-systems) for
+information on how to set up an AOS configuration to be multi-node.
+
+Note: Currently, while "single-node" is technically a distinct mode from
+"multi-node" in AOS configurations, it is possible that we will deprecate the
+single-node mode in the future in the favor of simply requiring that any
+single-node systems be multi-node systems with one node.
+
+### Message Forwarding & Reliability
+
+Any given channel can be configured to be forwarded between nodes. That channel
+may only be sent on from one node (the `source_node`), and can be forwarded to
+arbitrarily many other nodes. Each individual forwarding connection can be
+configured separately, such that forwarding channel `/foo` from node A to node B
+may be at a higher priority or reliability than from node A to node C. For each
+forwarding connection, you may configure:
+
+* Where & how to log timestamp information associated with the forwarding.
+* The SCTP priority with which to forward the message.
+* Whether to guarantee delivery of the message, akin to TCP ("reliable forwarding").
+* The time to live to set for messages with unreliable forwarding.
+
+As alluded to, message forwarding is currently assumed to be implemented by
+[SCTP](https://en.wikipedia.org/wiki/Stream_Control_Transmission_Protocol) at
+runtime. In simulation, forwarding is entirely deterministic and reliable
+unless the user configures the `SimulatedNetworkBridge` otherwise. Should
+any additional `EventLoop` implementations be added in the future that do
+_not_ use SCTP, they would be expected to provide similar options for
+configuration, although one could reasonable create an implementation that did
+not implement all the possible configurability. For that matter, some SCTP
+implementations may not implement the priority option.
+
+Ordering is guaranteed within a channel---an observer on a channel will never
+observe a newer message before an older message. Ordering between channels is
+not guaranteed. Consider, for instance, two channels:
+
+* Channel `/big_and_slow` sends infrequent but large messages. At t=0sec it
+  sends a 100MB message which takes ~1 sec to transfer across the network.
+* Channel `/small_and_fast` sends frequent but small and high priority messages.
+  At t=0.01sec it sends a 100 byte message which crosses the network in <1ms.
+
+Depending on the SCTP implementation, it is entirely possible that the
+`/small_and_fast` message will arrive at the destination node before the
+`/big_and_slow` message. This is desired behavior (we wouldn't want a
+low-priority channel able to block the delivery of high-priority messages).
+
+Once the messages have arrived on the destination node, ordering will again be
+~guaranteed[^ordering-guarantee] across channels, but with respect to the time
+at which the message was sent in shared memory by the message bridge on
+the current node.
+
+[^ordering-guarantee]: There is actually an outstanding bug where in some
+  corner-cases, ordering is not actually guaranteed. See
+  https://github.com/frc971/971-Robot-Code/issues/29
+
+#### Forwarded Message Metadata
+
+Forwarded messages have additional metadata populated in the [Event Loop
+Context](#event-loop-context). The following fields become relevant for
+forwarded messages:
+
+```cpp
+// For a single-node configuration, these two are identical to *_event_time.
+// In a multinode configuration, these are the times that the message was
+// sent on the original node.
+monotonic_clock::time_point monotonic_remote_time;
+realtime_clock::time_point realtime_remote_time;
+
+// Index into the remote queue.  Useful to determine if data was lost.  In a
+// single-node configuration, this will match queue_index.
+uint32_t remote_queue_index;
+
+// UUID of the remote node which sent this message, or this node in the case
+// of events which are local to this node.
+UUID source_boot_uuid = UUID::Zero();
+```
+
+The remote times tell you the time at which the message was sent on the original
+node, and represent the clocks _on that node_. As such, in order to meaningfully
+compare the remote times to local times (e.g., if you want to figure out how
+long ago the message was actually sent) you must either (a) trust the realtime
+clocks; or (b) [compensate for the monotonic clock
+offsets](#correlating-monotonic-clocks-across-nodes). If you want the "event"
+time for when your current watcher or the such got triggered, use the regular
+`monotonic_event_time`. The only real use of the `remote_queue_index` would be
+to detect when messages were dropped over the network.
+
+The `source_boot_uuid` can be used to determine if the source node rebooted
+between observing messages. Arbitrarily many messages may have been dropped by
+the forwarding during a reboot, as we cannot guarantee that every single message
+sent from a rebooting node while it is rebooting gets forwarded.
+
+#### Reliable vs. Unreliable Forwarding
+
+"Unreliable" forwarding is essentially what it sounds like---messages will be
+forwarded; if they do not make it within the `time_to_live`, then they will not
+get delivered to the receiving node. Unreliable messages that were sent from the
+sending node prior to the recipient node having become connected to the sending
+node are simply dropped. Unreliable forwarding is generally the default state
+for most channels.
+
+"Reliable" forwarding, on the other hand, carries two primary guarantees beyond
+the normal unreliable messages:
+
+* As long as two nodes are connected, all messages on reliable channels will be
+  forwarded, regardless of how much time must be spent retrying (this generally
+  makes reliable forwarding a poor choice for high-rate, latency-critical
+  messages).
+* When two nodes connect, the most recently sent message (if any) on each
+  reliable channel will get forwarded to the receiving node. This makes it so
+  that, e.g., if you have a central node that sends out a configuration message
+  once on startup then you can make it so that whenever the edge nodes connect
+  they will get the configuration message forwarded to them, even if they were
+  not online when the configuration message was originally published.
+
+Note that if the message bridges on two nodes disconnect and then reconnect in
+the future (without a node reboot occurring), then the semantics of reliable
+messages are similar to what happens on boot. Namely:
+
+* If no reliable messages were sent during the disconnect, then nothing happens.
+* If 1 or more reliable messages were sent during the disconnect, then the
+  latest message will get forwarded to the destination node.
+
+#### Message Bridge
+
+The applications that manage the forwarding of messages between nodes are
+collectively known as the "message bridge." In simulation, this is managed by
+the `SimulatedMessageBridge`, which at it's core is a bunch of
+`std::deque`'s that are used to introduce simulated network latency (and/or
+unreliability) to the simulation. At runtime, the `message_bridge_client` and
+`message_bridge_server` handle forwarding messages from shared memory channels
+to SCTP. The server is responsible for listening to messages sent on the
+`source_node` and forwarding them to the network, while the client is
+responsible for subscribing to the server, receiving messages, and republishing
+them onto the shared memory channels locally.
+
+The message bridge processes themselves are "just" regular AOS processes. They
+use a `ShmEventLoop` to interface with AOS, and do need to break out of the
+normal abstractions to interact with SCTP. The main things which they do which
+do break out of the abstractions which most users should worry about are:
+
+* The `message_bridge_client` uses `RawSender`s to allow itself to send on
+  channels which other applications cannot (since you shouldn't be able to send
+  on channels where you are not the `source_node`). Additionally, when sending
+  these messages, the client manually populates the various remote message
+  metadata.
+* The `message_bridge_server` sends special `RemoteMessage` messages on the
+  remote timestamp channels (the sending of these does not actually require
+  anything special, but the logger treats these messages specially).
+
+Typically, the operation of message bridge should be reasonably transparent to
+most users; however, there are times when it is useful to watch the message
+bridge status messages. The status messages contain a variety of information,
+but the main pieces of information that you are likely to care about are:
+
+* The connection state. Unless the state for a given node is `CONNECTED`, then
+  messages will not be flowing. Note that states for the message bridge client
+  and server are tracked separately, so it is possible that you may be connected
+  to a node such that you are receiving messages from it but not successfully
+  sending to it, or vice-versa.
+* The current estimates of the monotonic clock offsets between the nodes. See
+  [Correlating Monotonic Clocks Across
+  Nodes](#correlating-monotonic-clocks-across-nodes)
+
+### Cross-node Clocks
+
+Dealing with time & clocks across nodes can be tricky. While we do require that
+the monotonic clock on each device (generally corresponding to `CLOCK_MONOTONIC`
+on the system, and virtually always a time since boot) be monotonic, the
+realtime clock (i.e., the clock providing the time since the Unix Epoch) may not
+be monotonic, or even if it is monotonic, it may jump or speed up/slow down
+significantly. If you have NTP (or some similar protocol) set up on your system,
+then you may be able to control the behavior of the clocks more tightly than we
+guarantee, but AOS itself does not provide any guarantees around the realtime
+clock.
+
+Additionally, the log reading code currently makes assumptions about how quickly
+the monotonic clocks on different nodes can drift apart from one another. This
+is currently a [hard-coded value if 1ms /
+sec](https://github.com/frc971/971-Robot-Code/blob/790cb54590e4f28f61e2f1bcd2e6e12ca47d7713/aos/network/timestamp_filter.h#L21-L26)
+(i.e., we assume that over one second of time on one node's clock, the other
+node's clock will have advanced by somewhere between 999 and 1001 milliseconds).
+This number could plausibly be changed, but we have not yet encountered clocks
+actually drifting by enough to require that.
+
+#### Correlating Monotonic Clocks Across Nodes
+
+When dealing with time across nodes, we rely on the ongoing flow of messages to
+try and estimate the offsets between different clocks. The results of this
+estimation are published in the `aos.message_bridge.ServerStatistics` and
+`aos.message_bridge.ClientStatistics` messages (note that the offsets provided
+in these messages may be slightly different, as they are estimated independently).
+These estimates should be reasonable, but are currently not well validated in
+all possible corner cases. If you discover a situation where they are too
+unreliable for your use-case, that would be something we would want to fix.
+
+As an example, consider a situation where you are receiving sensor data from a
+node named `sensor`. This sensor data is on the channel `/input` with a type of
+`SensorData`. You wish to determine how old the sensor data is, but do not have
+accurate realtime clocks set up on your machine. As such, you would have
+something like:
+
+```cpp
+class SensorAgeReader {
+ public:
+  SensorAgeReader(aos::EventLoop *event_loop)
+      : event_loop_(event_loop),
+        clock_offset_fetcher_(
+            event_loop->MakeFetcher<aos::message_bridge::ServerStatistics>(
+                "/aos")) {
+    event_loop_->MakeWatcher(
+        "/input", [this](const SensorData &msg) { HandleSensorData(msg); });
+  }
+
+  void HandleSensorData(const SensorData &msg) {
+    std::chrono::nanoseconds monotonic_offset{0};
+    clock_offset_fetcher_.Fetch();
+    if (clock_offset_fetcher_.get() != nullptr) {
+      for (const auto connection : *clock_offset_fetcher_->connections()) {
+        if (connection->has_node() && connection->node()->has_name() &&
+            connection->node()->name()->string_view() == "sensor") {
+          if (connection->has_monotonic_offset()) {
+            monotonic_offset =
+                std::chrono::nanoseconds(connection->monotonic_offset());
+          } else {
+            // If we don't have a monotonic offset, that means we aren't
+            // connected, in which case we should just exit early.
+            // The ServerStatistics message will always populate statuses for
+            // every node, so we don't have to worry about missing the "sensor"
+            // node (although it can be good practice to check that the node you
+            // are looking for actually exists, to protect against programming
+            // errors).
+            LOG(WARNING) << "Message bridge disconnected.";
+            return;
+          }
+          break;
+        }
+      }
+    } else {
+      LOG(WARNING) << "No message bridge status available.";
+      return;
+    }
+    const aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
+    // The monotonic_remote_time will be the time that the message was sent on
+    // the source node; by offsetting it by the monotonic_offset, we should get
+    // a reasonable estimate of when it was sent. This does not account for any
+    // delays between the sensor reading and when it actually got sent.
+    const aos::monotonic_clock::time_point send_time(
+        event_loop_->context().monotonic_remote_time - monotonic_offset);
+    // Many sensors may include some sort of hardware timestamp indicating when
+    // the measurement was taken, which is likely before the sent time. This can
+    // be populated as a data field inside of the message, and if it is using
+    // the same monotonic clock as AOS is then we can do the same offset
+    // computation, but get a timestamp for when the data was actually captured.
+    const aos::monotonic_clock::time_point capture_time(
+        std::chrono::nanoseconds(msg.hardware_capture_time_ns()) -
+        monotonic_offset);
+    LOG(INFO) << "The sensor data was sent "
+              << aos::time::DurationInSeconds(now - send_time)
+              << " seconds ago.";
+    LOG(INFO) << "The sensor data was read off of the hardware "
+              << aos::time::DurationInSeconds(now - capture_time)
+              << " seconds ago.";
+  }
+
+  aos::EventLoop *event_loop_;
+  aos::Fetcher<aos::message_bridge::ServerStatistics> clock_offset_fetcher_;
+};
+```
 
 ## Configurations
 
+### Configuring Multi-Node Systems
+
 ## FlatBuffers
 
 See [FlatBuffers](/flatbuffers).
@@ -1272,12 +1805,6 @@
 
 ### Sent Too Fast
 
-## Multi-Node
-
-### Message Bridge
-
-### Correlating Monotonic Clocks Across Nodes
-
 ## Simulation
 
 ## Logging

diff --git a/documentation/aos/examples/BUILD b/documentation/aos/examples/BUILD
new file mode 100644
index 0000000..f224409
--- /dev/null
+++ b/documentation/aos/examples/BUILD

@@ -0,0 +1,18 @@
+load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
+
+flatbuffer_cc_library(
+    name = "sensor_data_fbs",
+    srcs = ["sensor_data.fbs"],
+    gen_reflections = True,
+)
+
+cc_library(
+    name = "clock_offset_reader",
+    srcs = ["clock_offset_reader.cc"],
+    hdrs = ["clock_offset_reader.h"],
+    deps = [
+        ":sensor_data_fbs",
+        "//aos/events:event_loop",
+        "//aos/network:message_bridge_server_fbs",
+    ],
+)

diff --git a/documentation/aos/examples/clock_offset_reader.cc b/documentation/aos/examples/clock_offset_reader.cc
new file mode 100644
index 0000000..7859a65
--- /dev/null
+++ b/documentation/aos/examples/clock_offset_reader.cc

@@ -0,0 +1 @@
+#include "documentation/aos/examples/clock_offset_reader.h"

diff --git a/documentation/aos/examples/clock_offset_reader.h b/documentation/aos/examples/clock_offset_reader.h
new file mode 100644
index 0000000..330da4f
--- /dev/null
+++ b/documentation/aos/examples/clock_offset_reader.h

@@ -0,0 +1,80 @@
+#ifndef DOCUMENTATION_AOS_EXAMPLES_CLOCK_OFFSET_READER_H_
+#define DOCUMENTATION_AOS_EXAMPLES_CLOCK_OFFSET_READER_H_
+#include "aos/events/event_loop.h"
+#include "aos/network/message_bridge_server_generated.h"
+#include "documentation/aos/examples/sensor_data_generated.h"
+
+namespace examples {
+
+// This class is a sample that is shown in the markdown documentation.
+// If it needs to get updated, the sample should get updated as well.
+// TODO(james): Get a convenient way to directly include portions of files in
+// markdown so that we don't just manually copy-and-paste the code between
+// spots.
+class SensorAgeReader {
+ public:
+  SensorAgeReader(aos::EventLoop *event_loop)
+      : event_loop_(event_loop),
+        clock_offset_fetcher_(
+            event_loop->MakeFetcher<aos::message_bridge::ServerStatistics>(
+                "/aos")) {
+    event_loop_->MakeWatcher(
+        "/input", [this](const SensorData &msg) { HandleSensorData(msg); });
+  }
+
+  void HandleSensorData(const SensorData &msg) {
+    std::chrono::nanoseconds monotonic_offset{0};
+    clock_offset_fetcher_.Fetch();
+    if (clock_offset_fetcher_.get() != nullptr) {
+      for (const auto connection : *clock_offset_fetcher_->connections()) {
+        if (connection->has_node() && connection->node()->has_name() &&
+            connection->node()->name()->string_view() == "sensor") {
+          if (connection->has_monotonic_offset()) {
+            monotonic_offset =
+                std::chrono::nanoseconds(connection->monotonic_offset());
+          } else {
+            // If we don't have a monotonic offset, that means we aren't
+            // connected, in which case we should just exit early.
+            // The ServerStatistics message will always populate statuses for
+            // every node, so we don't have to worry about missing the "sensor"
+            // node (although it can be good practice to check that the node you
+            // are looking for actually exists, to protect against programming
+            // errors).
+            LOG(WARNING) << "Message bridge disconnected.";
+            return;
+          }
+          break;
+        }
+      }
+    } else {
+      LOG(WARNING) << "No message bridge status available.";
+      return;
+    }
+    const aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
+    // The monotonic_remote_time will be the time that the message was sent on
+    // the source node; by offsetting it by the monotonic_offset, we should get
+    // a reasonable estimate of when it was sent. This does not account for any
+    // delays between the sensor reading and when it actually got sent.
+    const aos::monotonic_clock::time_point send_time(
+        event_loop_->context().monotonic_remote_time - monotonic_offset);
+    // Many sensors may include some sort of hardware timestamp indicating when
+    // the measurement was taken, which is likely before the sent time. This can
+    // be populated as a data field inside of the message, and if it is using
+    // the same monotonic clock as AOS is then we can do the same offset
+    // computation, but get a timestamp for when the data was actually captured.
+    const aos::monotonic_clock::time_point capture_time(
+        std::chrono::nanoseconds(msg.hardware_capture_time_ns()) -
+        monotonic_offset);
+    LOG(INFO) << "The sensor data was sent "
+              << aos::time::DurationInSeconds(now - send_time)
+              << " seconds ago.";
+    LOG(INFO) << "The sensor data was read off of the hardware "
+              << aos::time::DurationInSeconds(now - capture_time)
+              << " seconds ago.";
+  }
+
+  aos::EventLoop *event_loop_;
+  aos::Fetcher<aos::message_bridge::ServerStatistics> clock_offset_fetcher_;
+};
+}  // namespace examples
+#endif  // DOCUMENTATION_AOS_EXAMPLES_CLOCK_OFFSET_READER_H_

diff --git a/documentation/aos/examples/sensor_data.fbs b/documentation/aos/examples/sensor_data.fbs
new file mode 100644
index 0000000..c90472c
--- /dev/null
+++ b/documentation/aos/examples/sensor_data.fbs

@@ -0,0 +1,7 @@
+namespace examples;
+
+table SensorData {
+  hardware_capture_time_ns:uint64 (id: 0);
+}
+
+root_type SensorData;

diff --git a/frc971/downloader/downloader.py b/frc971/downloader/downloader.py
index d21d8bd..a962d5b 100644
--- a/frc971/downloader/downloader.py
+++ b/frc971/downloader/downloader.py

@@ -126,7 +126,7 @@
             os.chmod(os.path.join(temp_dir, "starterd"), 0o775 | stat.S_ISUID)
 
         rsync_cmd = ([
-            "external/rsync/usr/bin/rsync",
+            "external/rsync/rsync",
             "-e",
             ssh_path,
             "-c",

diff --git a/y2023/constants.cc b/y2023/constants.cc
index 3d49cf8..c17ffd0 100644
--- a/y2023/constants.cc
+++ b/y2023/constants.cc

@@ -102,7 +102,7 @@
       arm_distal->zeroing.one_revolution_distance =
           M_PI * 2.0 * constants::Values::kDistalEncoderRatio();
 
-      roll_joint->zeroing.measured_absolute_position = 0.424187348328397;
+      roll_joint->zeroing.measured_absolute_position = 0.919380272775513;
       roll_joint->potentiometer_offset =
           -(3.87038557084874 - 0.0241774522172967 + 0.0711345168020632 -
             0.866186131631967 - 0.0256788357596952 + 0.18101759154572017 -
@@ -111,7 +111,7 @@
             0.11972765117321 - 0.318724743041507) +
           0.0201047336425017 - 1.0173426655158 - 0.186085272847293 -
           0.0317706563397807 - 2.6357823523782 + 0.871932806570122 +
-          1.09682107821155;
+          1.09682107821155 - 0.193945964842277;
 
       wrist->subsystem_params.zeroing_constants.measured_absolute_position =
           0.886183343417664;

diff --git a/y2023/vision/image_logger.cc b/y2023/vision/image_logger.cc
index 1b457f9..45e25f6 100644
--- a/y2023/vision/image_logger.cc
+++ b/y2023/vision/image_logger.cc

@@ -57,7 +57,7 @@
   aos::logger::Logger logger(&event_loop);
 
   if (FLAGS_rotate_every != 0.0) {
-    logger.set_on_logged_period([&] {
+    logger.set_on_logged_period([&](aos::monotonic_clock::time_point) {
       const auto now = event_loop.monotonic_now();
       if (logging && now > last_rotation_time + std::chrono::duration<double>(
                                                     FLAGS_rotate_every)) {
commit	567ab81248a751835e53eb0f3a42e9ffad57fdc0	[log] [tgz]
author	Jim Ostrowski <yimmy13@gmail.com>	Sat Sep 16 12:29:24 2023 -0700
committer	Gerrit Code Review <gerrit@software.frc971.org>	Sat Sep 16 12:29:24 2023 -0700
tree	c251115b41327c6c1fb8bd2fadaa8afc81dc07f5
parent	faec51a2befca1af4f0ec8a529a2e4e77539bbb5 [diff]
parent	e2d6f6684a7318dec5a5b3f33ab2151e02105ce6 [diff]