Merge changes If4369697,I4390f59e

* changes:
  Add Turret BUILD and turret.py
  Turret Messages
diff --git a/aos/BUILD b/aos/BUILD
index f0c12b9..ac5c3e9 100644
--- a/aos/BUILD
+++ b/aos/BUILD
@@ -1,13 +1,11 @@
 load("//tools:environments.bzl", "mcu_cpus")
-load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library", "flatbuffer_python_library")
+load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library", "flatbuffer_py_library")
 
 filegroup(
     name = "prime_binaries",
     srcs = [
         "//aos:aos_dump",
         "//aos:core",
-        "//aos/logging:log_displayer",
-        "//aos/logging:log_streamer",
         "//aos/starter",
     ],
     visibility = ["//visibility:public"],
@@ -17,7 +15,6 @@
     name = "prime_start_binaries",
     srcs = [
         "//aos/events/logging:logger_main",
-        "//aos/logging:binary_log_writer",
     ],
     visibility = ["//visibility:public"],
 )
@@ -27,8 +24,6 @@
     srcs = [
         # starter is hard coded to look for a non-stripped core...
         "//aos:core",
-        "//aos/logging:log_streamer.stripped",
-        "//aos/logging:log_displayer.stripped",
         "//aos:aos_dump.stripped",
         "//aos/starter",
     ],
@@ -39,7 +34,6 @@
     name = "prime_start_binaries_stripped",
     srcs = [
         "//aos/events/logging:logger_main.stripped",
-        "//aos/logging:binary_log_writer.stripped",
     ],
     visibility = ["//visibility:public"],
 )
@@ -159,7 +153,6 @@
         "//aos/testing:prevent_exit",
         "//aos/testing:test_shm",
         "//aos/time",
-        "//aos/util:thread",
     ],
 )
 
@@ -254,7 +247,6 @@
         ":complex_thread_local",
         "//aos/logging",
         "//aos/testing:googletest",
-        "//aos/util:thread",
     ],
 )
 
@@ -295,7 +287,7 @@
     visibility = ["//visibility:public"],
 )
 
-flatbuffer_python_library(
+flatbuffer_py_library(
     name = "configuration_fbs_python",
     srcs = ["configuration.fbs"],
     namespace = "aos",
@@ -459,11 +451,11 @@
     ],
     visibility = ["//visibility:public"],
     deps = [
+        "//aos:macros",
         "@com_github_google_flatbuffers//:flatbuffers",
         "@com_github_google_glog//:glog",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
-        "//aos:macros",
     ],
 )
 
diff --git a/aos/actions/action_test.cc b/aos/actions/action_test.cc
index fa1a347..124b3e4 100644
--- a/aos/actions/action_test.cc
+++ b/aos/actions/action_test.cc
@@ -108,6 +108,11 @@
     ::aos::testing::EnableTestLogging();
   }
 
+  void RunAt(monotonic_clock::time_point exec_time, std::function<void()> fn) {
+    TimerHandler *timer = test_event_loop_->AddTimer(fn);
+    test_event_loop_->OnRun([timer, exec_time]() { timer->Setup(exec_time); });
+  }
+
   FlatbufferDetachedBuffer<Configuration> configuration_;
 
   // Bring up and down Core.
@@ -158,32 +163,35 @@
 
   ASSERT_FALSE(status_fetcher.Fetch());
 
-  event_loop_factory_.RunFor(chrono::seconds(1));
+  RunAt(monotonic_clock::time_point(chrono::seconds(1)), [&]() {
+    ASSERT_TRUE(status_fetcher.Fetch());
+    EXPECT_EQ(0u, status_fetcher->running());
+    EXPECT_EQ(0u, status_fetcher->last_running());
 
-  ASSERT_TRUE(status_fetcher.Fetch());
-  EXPECT_EQ(0u, status_fetcher->running());
-  EXPECT_EQ(0u, status_fetcher->last_running());
+    {
+      UIntT uint;
+      uint.val = 0;
+      action_queue.EnqueueAction(nop_actor_factory.Make(uint));
+    }
 
-  {
-    UIntT uint;
-    uint.val = 0;
-    action_queue.EnqueueAction(nop_actor_factory.Make(uint));
-  }
+    // We started an action and it should be running.
+    EXPECT_TRUE(action_queue.Running());
 
-  // We started an action and it should be running.
-  EXPECT_TRUE(action_queue.Running());
+    action_queue.CancelAllActions();
+    action_queue.Tick();
 
-  action_queue.CancelAllActions();
-  action_queue.Tick();
-
-  EXPECT_TRUE(action_queue.Running());
+    EXPECT_TRUE(action_queue.Running());
+  });
 
   // Run the action so it can signal completion.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
+  RunAt(monotonic_clock::time_point(chrono::seconds(2)), [&]() {
+    action_queue.Tick();
 
-  // Make sure it stopped.
-  EXPECT_FALSE(action_queue.Running());
+    // Make sure it stopped.
+    EXPECT_FALSE(action_queue.Running());
+  });
+
+  event_loop_factory_.RunFor(chrono::seconds(3));
 }
 
 // Tests that an action starts and stops.
@@ -196,29 +204,33 @@
   ActionQueue action_queue;
 
   // Tick an empty queue and make sure it was not running.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
-  EXPECT_FALSE(action_queue.Running());
+  RunAt(monotonic_clock::time_point(chrono::seconds(1)), [&]() {
+    action_queue.Tick();
+    EXPECT_FALSE(action_queue.Running());
 
-  {
-    UIntT uint;
-    uint.val = 0;
-    action_queue.EnqueueAction(nop_actor_factory.Make(uint));
-  }
+    {
+      UIntT uint;
+      uint.val = 0;
+      action_queue.EnqueueAction(nop_actor_factory.Make(uint));
+    }
 
-  // We started an action and it should be running.
-  EXPECT_TRUE(action_queue.Running());
+    // We started an action and it should be running.
+    EXPECT_TRUE(action_queue.Running());
 
-  // Tick it and make sure it is still running.
-  action_queue.Tick();
-  EXPECT_TRUE(action_queue.Running());
+    // Tick it and make sure it is still running.
+    action_queue.Tick();
+    EXPECT_TRUE(action_queue.Running());
+  });
 
   // Run the action so it can signal completion.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
+  RunAt(monotonic_clock::time_point(chrono::seconds(2)), [&]() {
+    action_queue.Tick();
 
-  // Make sure it stopped.
-  EXPECT_FALSE(action_queue.Running());
+    // Make sure it stopped.
+    EXPECT_FALSE(action_queue.Running());
+  });
+
+  event_loop_factory_.RunFor(chrono::seconds(3));
 }
 
 // Tests that we can cancel two actions and have them both stop.
@@ -231,52 +243,55 @@
   ActionQueue action_queue;
 
   // Let the actor and action queue start up and confirm nothing is running.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
+  RunAt(monotonic_clock::time_point(chrono::seconds(1)), [&]() {
+    action_queue.Tick();
 
-  EXPECT_FALSE(action_queue.Running());
+    EXPECT_FALSE(action_queue.Running());
 
-  // Enqueue two actions to test both cancel. We can have an action and a next
-  // action so we want to test that.
-  {
-    UIntT uint;
-    uint.val = 0;
-    action_queue.EnqueueAction(nop_actor_factory.Make(uint));
-    action_queue.EnqueueAction(nop_actor_factory.Make(uint));
-  }
+    // Enqueue two actions to test both cancel. We can have an action and a next
+    // action so we want to test that.
+    {
+      UIntT uint;
+      uint.val = 0;
+      action_queue.EnqueueAction(nop_actor_factory.Make(uint));
+      action_queue.EnqueueAction(nop_actor_factory.Make(uint));
+    }
 
-  action_queue.Tick();
+    action_queue.Tick();
 
-  // Check that current and next exist.
-  EXPECT_TRUE(action_queue.GetCurrentActionState(nullptr, nullptr, nullptr,
+    // Check that current and next exist.
+    EXPECT_TRUE(action_queue.GetCurrentActionState(nullptr, nullptr, nullptr,
+                                                   nullptr, nullptr, nullptr));
+    EXPECT_TRUE(action_queue.GetNextActionState(nullptr, nullptr, nullptr,
+                                                nullptr, nullptr, nullptr));
+
+    action_queue.CancelAllActions();
+    action_queue.Tick();
+
+    // It should still be running as the actor could not have signaled.
+    EXPECT_TRUE(action_queue.Running());
+
+    bool sent_started, sent_cancel, interrupted;
+    EXPECT_TRUE(action_queue.GetCurrentActionState(
+        nullptr, &sent_started, &sent_cancel, &interrupted, nullptr, nullptr));
+    EXPECT_TRUE(sent_started);
+    EXPECT_TRUE(sent_cancel);
+    EXPECT_FALSE(interrupted);
+
+    EXPECT_FALSE(action_queue.GetNextActionState(nullptr, nullptr, nullptr,
                                                  nullptr, nullptr, nullptr));
-  EXPECT_TRUE(action_queue.GetNextActionState(nullptr, nullptr, nullptr,
-                                              nullptr, nullptr, nullptr));
-
-  action_queue.CancelAllActions();
-  action_queue.Tick();
-
-  // It should still be running as the actor could not have signaled.
-  EXPECT_TRUE(action_queue.Running());
-
-  bool sent_started, sent_cancel, interrupted;
-  EXPECT_TRUE(action_queue.GetCurrentActionState(
-      nullptr, &sent_started, &sent_cancel, &interrupted, nullptr, nullptr));
-  EXPECT_TRUE(sent_started);
-  EXPECT_TRUE(sent_cancel);
-  EXPECT_FALSE(interrupted);
-
-  EXPECT_FALSE(action_queue.GetNextActionState(nullptr, nullptr, nullptr,
-                                               nullptr, nullptr, nullptr));
+  });
 
   // Run the action so it can signal completion.
-  event_loop_factory_.RunFor(chrono::seconds(1));
+  RunAt(monotonic_clock::time_point(chrono::seconds(2)), [&]() {
+    action_queue.Tick();
 
-  action_queue.Tick();
+    // Make sure it stopped.
+    EXPECT_FALSE(action_queue.Running());
+    EXPECT_EQ(1, nop_act.running_count());
+  });
 
-  // Make sure it stopped.
-  EXPECT_FALSE(action_queue.Running());
-  EXPECT_EQ(1, nop_act.running_count());
+  event_loop_factory_.RunFor(chrono::seconds(3));
 }
 
 // Tests that an action that would block forever stops when canceled.
@@ -289,30 +304,34 @@
   ActionQueue action_queue;
 
   // Let the actor and action queue start up.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
+  RunAt(monotonic_clock::time_point(chrono::seconds(1)), [&]() {
+    action_queue.Tick();
 
-  // Enqueue blocking action.
-  {
-    UIntT uint;
-    uint.val = 0;
-    action_queue.EnqueueAction(cancel_action_factory.Make(uint));
-  }
+    // Enqueue blocking action.
+    {
+      UIntT uint;
+      uint.val = 0;
+      action_queue.EnqueueAction(cancel_action_factory.Make(uint));
+    }
 
-  action_queue.Tick();
-  EXPECT_TRUE(action_queue.Running());
+    action_queue.Tick();
+    EXPECT_TRUE(action_queue.Running());
 
-  // Tell action to cancel.
-  action_queue.CancelCurrentAction();
-  action_queue.Tick();
+    // Tell action to cancel.
+    action_queue.CancelCurrentAction();
+    action_queue.Tick();
+  });
 
   // This will block forever on failure.
   // TODO(ben): prolly a bad way to fail
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
+  RunAt(monotonic_clock::time_point(chrono::seconds(2)), [&]() {
+    action_queue.Tick();
 
-  // It should still be running as the actor could not have signalled.
-  EXPECT_FALSE(action_queue.Running());
+    // It should still be running as the actor could not have signalled.
+    EXPECT_FALSE(action_queue.Running());
+  });
+
+  event_loop_factory_.RunFor(chrono::seconds(3));
 }
 
 // Tests that 2 actions in a row causes the second one to cancel the first one.
@@ -323,78 +342,84 @@
       TestActorNOP::MakeFactory(test_event_loop_.get());
 
   ActionQueue action_queue;
-  // Tick an empty queue and make sure it was not running.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
-  EXPECT_FALSE(action_queue.Running());
-
-  // Enqueue action to be canceled.
-  {
-    UIntT uint;
-    uint.val = 0;
-    action_queue.EnqueueAction(nop_actor_factory.Make(uint));
-  }
-  action_queue.Tick();
-
-  // Should still be running as the actor could not have signalled.
-  EXPECT_TRUE(action_queue.Running());
 
   // id for the first time run.
   uint32_t nop_actor_id = 0;
   // Check the internal state and write down id for later use.
   bool sent_started, sent_cancel, interrupted;
-  EXPECT_TRUE(action_queue.GetCurrentActionState(nullptr, &sent_started,
-                                                 &sent_cancel, &interrupted,
-                                                 &nop_actor_id, nullptr));
-  EXPECT_TRUE(sent_started);
-  EXPECT_FALSE(sent_cancel);
-  EXPECT_FALSE(interrupted);
-  ASSERT_NE(0u, nop_actor_id);
-
-  // Add the next action which should ensure the first stopped.
-  {
-    UIntT uint;
-    uint.val = 0;
-    action_queue.EnqueueAction(nop_actor_factory.Make(uint));
-  }
-
   // id for the second run.
   uint32_t nop_actor2_id = 0;
-  // Check the internal state and write down id for later use.
-  EXPECT_TRUE(action_queue.GetNextActionState(nullptr, &sent_started,
-                                              &sent_cancel, &interrupted,
-                                              &nop_actor2_id, nullptr));
-  EXPECT_NE(nop_actor_id, nop_actor2_id);
-  EXPECT_FALSE(sent_started);
-  EXPECT_FALSE(sent_cancel);
-  EXPECT_FALSE(interrupted);
-  ASSERT_NE(0u, nop_actor2_id);
 
-  action_queue.Tick();
+  // Tick an empty queue and make sure it was not running.
+  RunAt(monotonic_clock::time_point(chrono::seconds(1)), [&]() {
+    action_queue.Tick();
+    EXPECT_FALSE(action_queue.Running());
+
+    // Enqueue action to be canceled.
+    {
+      UIntT uint;
+      uint.val = 0;
+      action_queue.EnqueueAction(nop_actor_factory.Make(uint));
+    }
+    action_queue.Tick();
+
+    // Should still be running as the actor could not have signalled.
+    EXPECT_TRUE(action_queue.Running());
+
+    EXPECT_TRUE(action_queue.GetCurrentActionState(nullptr, &sent_started,
+                                                   &sent_cancel, &interrupted,
+                                                   &nop_actor_id, nullptr));
+    EXPECT_TRUE(sent_started);
+    EXPECT_FALSE(sent_cancel);
+    EXPECT_FALSE(interrupted);
+    ASSERT_NE(0u, nop_actor_id);
+
+    // Add the next action which should ensure the first stopped.
+    {
+      UIntT uint;
+      uint.val = 0;
+      action_queue.EnqueueAction(nop_actor_factory.Make(uint));
+    }
+
+    // Check the internal state and write down id for later use.
+    EXPECT_TRUE(action_queue.GetNextActionState(nullptr, &sent_started,
+                                                &sent_cancel, &interrupted,
+                                                &nop_actor2_id, nullptr));
+    EXPECT_NE(nop_actor_id, nop_actor2_id);
+    EXPECT_FALSE(sent_started);
+    EXPECT_FALSE(sent_cancel);
+    EXPECT_FALSE(interrupted);
+    ASSERT_NE(0u, nop_actor2_id);
+
+    action_queue.Tick();
+  });
 
   // Run the action so it can signal completion.
-  event_loop_factory_.RunFor(chrono::seconds(1));
+  RunAt(monotonic_clock::time_point(chrono::seconds(2)), [&]() {
+    action_queue.Tick();
 
-  action_queue.Tick();
+    // Check the new action is the right one.
+    uint32_t test_id = 0;
+    EXPECT_TRUE(action_queue.GetCurrentActionState(
+        nullptr, &sent_started, &sent_cancel, &interrupted, &test_id, nullptr));
+    EXPECT_TRUE(sent_started);
+    EXPECT_FALSE(sent_cancel);
+    EXPECT_FALSE(interrupted);
+    EXPECT_EQ(nop_actor2_id, test_id);
 
-  // Check the new action is the right one.
-  uint32_t test_id = 0;
-  EXPECT_TRUE(action_queue.GetCurrentActionState(
-      nullptr, &sent_started, &sent_cancel, &interrupted, &test_id, nullptr));
-  EXPECT_TRUE(sent_started);
-  EXPECT_FALSE(sent_cancel);
-  EXPECT_FALSE(interrupted);
-  EXPECT_EQ(nop_actor2_id, test_id);
-
-  // Make sure it is still going.
-  EXPECT_TRUE(action_queue.Running());
+    // Make sure it is still going.
+    EXPECT_TRUE(action_queue.Running());
+  });
 
   // Now let everything finish.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
+  RunAt(monotonic_clock::time_point(chrono::seconds(3)), [&]() {
+    action_queue.Tick();
 
-  // Make sure it stopped.
-  EXPECT_FALSE(action_queue.Running());
+    // Make sure it stopped.
+    EXPECT_FALSE(action_queue.Running());
+  });
+
+  event_loop_factory_.RunFor(chrono::seconds(4));
 }
 
 // Tests that we do get an index with our goal
@@ -403,51 +428,53 @@
 
   TestActorIndex::Factory test_actor_index_factory =
       TestActorIndex::MakeFactory(test_event_loop_.get());
+  ::aos::Fetcher<actions::TestActionGoal> goal_fetcher_ =
+      test_event_loop_->MakeFetcher<actions::TestActionGoal>("/test_action");
 
   ActionQueue action_queue;
   // Tick an empty queue and make sure it was not running.  Also tick the
   // factory to allow it to send out the initial cancel message.
-  event_loop_factory_.RunFor(chrono::seconds(1));
-  action_queue.Tick();
+  RunAt(monotonic_clock::time_point(chrono::seconds(1)), [&]() {
+    action_queue.Tick();
 
-  EXPECT_FALSE(action_queue.Running());
+    EXPECT_FALSE(action_queue.Running());
 
-  // Enqueue action to post index.
-  {
-    UIntT uint;
-    uint.val = 5;
-    action_queue.EnqueueAction(test_actor_index_factory.Make(uint));
-  }
-  ::aos::Fetcher<actions::TestActionGoal> goal_fetcher_ =
-      test_event_loop_->MakeFetcher<actions::TestActionGoal>(
-          "/test_action");
+    // Enqueue action to post index.
+    {
+      UIntT uint;
+      uint.val = 5;
+      action_queue.EnqueueAction(test_actor_index_factory.Make(uint));
+    }
 
-  ASSERT_TRUE(goal_fetcher_.Fetch());
-  EXPECT_EQ(5u, goal_fetcher_->params()->val());
-  EXPECT_EQ(0u, idx_actor.index);
+    ASSERT_TRUE(goal_fetcher_.Fetch());
+    EXPECT_EQ(5u, goal_fetcher_->params()->val());
+    EXPECT_EQ(0u, idx_actor.index);
 
-  action_queue.Tick();
+    action_queue.Tick();
+  });
 
   // Run the next action so it can accomplish signal completion.
-  event_loop_factory_.RunFor(chrono::seconds(1));
+  RunAt(monotonic_clock::time_point(chrono::seconds(2)), [&]() {
+    action_queue.Tick();
+    EXPECT_EQ(5u, idx_actor.index);
 
-  action_queue.Tick();
-  EXPECT_EQ(5u, idx_actor.index);
-
-  // Enqueue action to post index.
-  {
-    UIntT uint;
-    uint.val = 3;
-    action_queue.EnqueueAction(test_actor_index_factory.Make(uint));
-  }
-  ASSERT_TRUE(goal_fetcher_.Fetch());
-  EXPECT_EQ(3u, goal_fetcher_->params()->val());
+    // Enqueue action to post index.
+    {
+      UIntT uint;
+      uint.val = 3;
+      action_queue.EnqueueAction(test_actor_index_factory.Make(uint));
+    }
+    ASSERT_TRUE(goal_fetcher_.Fetch());
+    EXPECT_EQ(3u, goal_fetcher_->params()->val());
+  });
 
   // Run the next action so it can accomplish signal completion.
-  event_loop_factory_.RunFor(chrono::seconds(1));
+  RunAt(monotonic_clock::time_point(chrono::seconds(3)), [&]() {
+    action_queue.Tick();
+    EXPECT_EQ(3u, idx_actor.index);
+  });
 
-  action_queue.Tick();
-  EXPECT_EQ(3u, idx_actor.index);
+  event_loop_factory_.RunFor(chrono::seconds(3));
 }
 
 // Tests that an action with a structure params works.
@@ -458,32 +485,37 @@
       TestActor2Nop::MakeFactory(test_event_loop_.get());
 
   ActionQueue action_queue;
-  // Tick an empty queue and make sure it was not running.
-  action_queue.Tick();
-  EXPECT_FALSE(action_queue.Running());
 
-  actions::MyParamsT p;
-  p.param1 = 5.0;
-  p.param2 = 7;
+  RunAt(monotonic_clock::time_point(chrono::seconds(0)), [&]() {
+    // Tick an empty queue and make sure it was not running.
+    action_queue.Tick();
+    EXPECT_FALSE(action_queue.Running());
 
-  action_queue.EnqueueAction(test_action_2_nop_factory.Make(p));
+    actions::MyParamsT p;
+    p.param1 = 5.0;
+    p.param2 = 7;
 
-  // We started an action and it should be running.
-  EXPECT_TRUE(action_queue.Running());
+    action_queue.EnqueueAction(test_action_2_nop_factory.Make(p));
 
-  // Tick it and make sure it is still running.
-  action_queue.Tick();
-  EXPECT_TRUE(action_queue.Running());
+    // We started an action and it should be running.
+    EXPECT_TRUE(action_queue.Running());
+
+    // Tick it and make sure it is still running.
+    action_queue.Tick();
+    EXPECT_TRUE(action_queue.Running());
+  });
 
   // Run the action so it can signal completion.
   // The actor takes no time, but running for a second is the best way to get it
   // to go.
-  event_loop_factory_.RunFor(chrono::seconds(1));
+  RunAt(monotonic_clock::time_point(chrono::seconds(1)), [&]() {
+    action_queue.Tick();
 
-  action_queue.Tick();
+    // Make sure it stopped.
+    EXPECT_FALSE(action_queue.Running());
+  });
 
-  // Make sure it stopped.
-  EXPECT_FALSE(action_queue.Running());
+  event_loop_factory_.RunFor(chrono::seconds(2));
 }
 
 }  // namespace testing
diff --git a/aos/complex_thread_local_test.cc b/aos/complex_thread_local_test.cc
index b8eaa2c..5323dea 100644
--- a/aos/complex_thread_local_test.cc
+++ b/aos/complex_thread_local_test.cc
@@ -1,11 +1,10 @@
 #include "aos/complex_thread_local.h"
 
 #include <atomic>
+#include <thread>
 
 #include "gtest/gtest.h"
 
-#include "aos/util/thread.h"
-
 namespace aos {
 namespace testing {
 
@@ -65,7 +64,7 @@
 
 TEST_F(ComplexThreadLocalTest, AnotherThread) {
   EXPECT_FALSE(local.created());
-  util::FunctionThread::RunInOtherThread([this]() {
+  std::thread t1([this]() {
     EXPECT_FALSE(local.created());
     local.Create(971);
     EXPECT_TRUE(local.created());
@@ -73,21 +72,21 @@
     EXPECT_EQ(1, TraceableObject::constructions);
     EXPECT_EQ(0, TraceableObject::destructions);
   });
+  t1.join();
   EXPECT_EQ(1, TraceableObject::constructions);
   EXPECT_EQ(1, TraceableObject::destructions);
   EXPECT_FALSE(local.created());
 }
 
 TEST_F(ComplexThreadLocalTest, TwoThreads) {
-  util::FunctionThread thread([this](util::FunctionThread *) {
+  std::thread thread([this]() {
     local.Create(971);
     EXPECT_EQ(971, local->data);
     EXPECT_EQ(0, TraceableObject::destructions);
   });
-  thread.Start();
   local.Create(973);
   EXPECT_EQ(973, local->data);
-  thread.Join();
+  thread.join();
   EXPECT_TRUE(local.created());
   EXPECT_EQ(2, TraceableObject::constructions);
   EXPECT_EQ(1, TraceableObject::destructions);
diff --git a/aos/condition_test.cc b/aos/condition_test.cc
index 1e8d06c..b49f27c 100644
--- a/aos/condition_test.cc
+++ b/aos/condition_test.cc
@@ -19,7 +19,6 @@
 #include "aos/macros.h"
 #include "aos/ipc_lib/aos_sync.h"
 #include "aos/die.h"
-#include "aos/util/thread.h"
 #include "aos/testing/prevent_exit.h"
 
 namespace aos {
@@ -58,20 +57,18 @@
   ::std::atomic_bool child_finished(false);
   Condition child_ready(&mutex_);
   ASSERT_FALSE(mutex_.Lock());
-  util::FunctionThread child([this, &child_finished, &child_ready](
-      util::FunctionThread *) {
+  std::thread child([this, &child_finished, &child_ready] {
     ASSERT_FALSE(mutex_.Lock());
     child_ready.Broadcast();
     ASSERT_FALSE(condition_.Wait());
     child_finished.store(true);
     mutex_.Unlock();
   });
-  child.Start();
   ASSERT_FALSE(child_ready.Wait());
   EXPECT_FALSE(child_finished.load());
   condition_.Signal();
   mutex_.Unlock();
-  child.Join();
+  child.join();
   EXPECT_TRUE(child_finished.load());
 }
 
diff --git a/aos/init.cc b/aos/init.cc
index 957899c7..5d62868 100644
--- a/aos/init.cc
+++ b/aos/init.cc
@@ -58,25 +58,18 @@
   google::InstallFailureSignalHandler();
 }
 
-void InitNRT(bool for_realtime) {
+void InitNRT() {
   InitStart();
-  aos_core_create_shared_mem(false, for_realtime && ShouldBeRealtime());
-  logging::RegisterQueueImplementation();
   ExpandStackSize();
-  AOS_LOG(INFO, "%s initialized non-realtime\n", program_invocation_short_name);
 }
 
 void InitCreate() {
   InitStart();
-  aos_core_create_shared_mem(true, false);
-  logging::RegisterQueueImplementation();
   AOS_LOG(INFO, "%s created shm\n", program_invocation_short_name);
 }
 
 void Init(int relative_priority) {
   InitStart();
-  aos_core_create_shared_mem(false, ShouldBeRealtime());
-  logging::RegisterQueueImplementation();
   GoRT(relative_priority);
 }
 
diff --git a/aos/init.h b/aos/init.h
index 85d5da7..3ff56b8 100644
--- a/aos/init.h
+++ b/aos/init.h
@@ -5,6 +5,8 @@
 
 namespace aos {
 
+// TODO(james): Clean up/unify the various init functions.
+
 // Initializes glog and gflags.
 void InitGoogle(int *argc, char ***argv);
 
@@ -13,8 +15,7 @@
 // them again after fork(2)ing.
 
 // Does the non-realtime parts of the initialization process.
-// If for_realtime is true, this sets up to call GoRT later.
-void InitNRT(bool for_realtime = false);
+void InitNRT();
 // Initializes everything, including the realtime stuff.
 // relative_priority adjusts the priority of this process relative to all of the
 // other ones (positive for higher priority).
@@ -26,7 +27,7 @@
 // exit gracefully).
 void Cleanup();
 
-// Performs the realtime parts of initialization after InitNRT(true) has been called.
+// Performs the realtime parts of initialization after InitNRT() has been called.
 void GoRT(int relative_priority = 0);
 
 // Pins the current thread to CPU #number.
diff --git a/aos/ipc_lib/BUILD b/aos/ipc_lib/BUILD
index 91b050c..1989f06 100644
--- a/aos/ipc_lib/BUILD
+++ b/aos/ipc_lib/BUILD
@@ -101,7 +101,6 @@
         "//aos/testing:test_shm",
         "//aos/time",
         "//aos/util:death_test_log_implementation",
-        "//aos/util:thread",
     ],
 )
 
diff --git a/aos/ipc_lib/raw_queue_test.cc b/aos/ipc_lib/raw_queue_test.cc
index 1c02c53..3684029 100644
--- a/aos/ipc_lib/raw_queue_test.cc
+++ b/aos/ipc_lib/raw_queue_test.cc
@@ -8,6 +8,7 @@
 #include <ostream>
 #include <memory>
 #include <map>
+#include <thread>
 
 #include "gtest/gtest.h"
 
@@ -17,7 +18,6 @@
 #include "aos/time/time.h"
 #include "aos/logging/logging.h"
 #include "aos/die.h"
-#include "aos/util/thread.h"
 #include "aos/util/options.h"
 #include "aos/util/death_test_log_implementation.h"
 #include "aos/testing/prevent_exit.h"
@@ -976,16 +976,14 @@
   ASSERT_NE(nullptr, message1);
   ASSERT_NE(nullptr, message2);
   EXPECT_EQ(free_before, queue->FreeMessages());
-  util::FunctionThread t1([message1, queue](util::Thread *) {
+  std::thread t1([message1, queue] {
     queue->FreeMessage(message1);
   });
-  util::FunctionThread t2([message2, queue](util::Thread *) {
+  std::thread t2([message2, queue] {
     queue->FreeMessage(message2);
   });
-  t1.Start();
-  t2.Start();
-  t1.WaitUntilDone();
-  t2.WaitUntilDone();
+  t1.join();
+  t2.join();
   EXPECT_EQ(free_before, queue->FreeMessages());
 }
 
diff --git a/aos/mutex/BUILD b/aos/mutex/BUILD
index e698f62..bdfb8fc 100644
--- a/aos/mutex/BUILD
+++ b/aos/mutex/BUILD
@@ -27,7 +27,6 @@
         "//aos/testing:test_shm",
         "//aos/time",
         "//aos/util:death_test_log_implementation",
-        "//aos/util:thread",
         "@com_github_google_glog//:glog",
     ],
 )
diff --git a/aos/mutex/mutex_test.cc b/aos/mutex/mutex_test.cc
index e7bd617..bd47ae0 100644
--- a/aos/mutex/mutex_test.cc
+++ b/aos/mutex/mutex_test.cc
@@ -16,7 +16,6 @@
 #include "aos/testing/test_shm.h"
 #include "aos/time/time.h"
 #include "aos/util/death_test_log_implementation.h"
-#include "aos/util/thread.h"
 
 namespace aos {
 namespace testing {
@@ -108,9 +107,10 @@
       static_cast<Mutex *>(shm_malloc_aligned(sizeof(Mutex), alignof(Mutex)));
   new (mutex) Mutex();
 
-  util::FunctionThread::RunInOtherThread([&]() {
+  std::thread thread([&]() {
     ASSERT_FALSE(mutex->Lock());
   });
+  thread.join();
   EXPECT_TRUE(mutex->Lock());
 
   mutex->Unlock();
@@ -124,9 +124,10 @@
       static_cast<Mutex *>(shm_malloc_aligned(sizeof(Mutex), alignof(Mutex)));
   new (mutex) Mutex();
 
-  util::FunctionThread::RunInOtherThread([&]() {
+  std::thread thread([&]() {
     ASSERT_FALSE(mutex->Lock());
   });
+  thread.join();
   EXPECT_EQ(Mutex::State::kOwnerDied, mutex->TryLock());
 
   mutex->Unlock();
@@ -145,11 +146,12 @@
   // in the original failure.
   Mutex mutex2;
 
-  util::FunctionThread::RunInOtherThread([&]() {
+  std::thread thread([&]() {
     ASSERT_FALSE(mutex1.Lock());
     ASSERT_FALSE(mutex2.Lock());
     mutex1.Unlock();
   });
+  thread.join();
 
   EXPECT_EQ(Mutex::State::kLocked, mutex1.TryLock());
   EXPECT_EQ(Mutex::State::kOwnerDied, mutex2.TryLock());
@@ -158,49 +160,21 @@
   mutex2.Unlock();
 }
 
-namespace {
-
-class AdderThread : public ::aos::util::Thread {
- public:
-  AdderThread(int *counter, Mutex *mutex,
-              monotonic_clock::duration sleep_before_time,
-              monotonic_clock::duration sleep_after_time)
-      : counter_(counter),
-        mutex_(mutex),
-        sleep_before_time_(sleep_before_time),
-        sleep_after_time_(sleep_after_time) {}
-
- private:
-  virtual void Run() override {
-    this_thread::sleep_for(sleep_before_time_);
-    MutexLocker locker(mutex_);
-    ++(*counter_);
-    this_thread::sleep_for(sleep_after_time_);
-  }
-
-  int *const counter_;
-  Mutex *const mutex_;
-  const monotonic_clock::duration sleep_before_time_, sleep_after_time_;
-};
-
-}  // namespace
-
 // Verifies that ThreadSanitizer understands that a contended mutex establishes
 // a happens-before relationship.
 TEST_F(MutexTest, ThreadSanitizerContended) {
   int counter = 0;
-  AdderThread threads[2]{
-      {&counter, &test_mutex_, chrono::milliseconds(200),
-       chrono::milliseconds(0)},
-      {&counter, &test_mutex_, chrono::milliseconds(0),
-       chrono::milliseconds(0)},
-  };
-  for (auto &c : threads) {
-    c.Start();
-  }
-  for (auto &c : threads) {
-    c.WaitUntilDone();
-  }
+  std::thread thread1([this, &counter]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(200));
+    MutexLocker locker(&test_mutex_);
+    ++counter;
+  });
+  std::thread thread2([this, &counter]() {
+    MutexLocker locker(&test_mutex_);
+    ++counter;
+  });
+  thread1.join();
+  thread2.join();
   EXPECT_EQ(2, counter);
 }
 
@@ -229,50 +203,29 @@
 // establishes a happens-before relationship.
 TEST_F(MutexTest, ThreadSanitizerUncontended) {
   int counter = 0;
-  AdderThread threads[2]{
-      {&counter, &test_mutex_, chrono::milliseconds(0),
-       chrono::milliseconds(0)},
-      {&counter, &test_mutex_, chrono::milliseconds(200),
-       chrono::milliseconds(0)}, };
-  for (auto &c : threads) {
-    c.Start();
-  }
-  for (auto &c : threads) {
-    c.WaitUntilDone();
-  }
+  std::thread thread1([this, &counter]() {
+    MutexLocker locker(&test_mutex_);
+    ++counter;
+  });
+  std::thread thread2([this, &counter]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(200));
+    MutexLocker locker(&test_mutex_);
+    ++counter;
+  });
+  thread1.join();
+  thread2.join();
   EXPECT_EQ(2, counter);
 }
 
-namespace {
-
-class LockerThread : public util::Thread {
- public:
-  LockerThread(Mutex *mutex, bool lock, bool unlock)
-      : mutex_(mutex), lock_(lock), unlock_(unlock) {}
-
- private:
-  virtual void Run() override {
-    if (lock_) {
-      ASSERT_FALSE(mutex_->Lock());
-    }
-    if (unlock_) {
-      mutex_->Unlock();
-    }
-  }
-
-  Mutex *const mutex_;
-  const bool lock_, unlock_;
-};
-
-}  // namespace
-
 // Makes sure that we don't SIGSEGV or something with multiple threads.
 TEST_F(MutexTest, MultiThreadedLock) {
-  LockerThread t(&test_mutex_, true, true);
-  t.Start();
+  std::thread thread([this] {
+    ASSERT_FALSE(test_mutex_.Lock());
+    test_mutex_.Unlock();
+  });
   ASSERT_FALSE(test_mutex_.Lock());
   test_mutex_.Unlock();
-  t.Join();
+  thread.join();
 }
 
 TEST_F(MutexLockerTest, Basic) {
@@ -292,9 +245,10 @@
       static_cast<Mutex *>(shm_malloc_aligned(sizeof(Mutex), alignof(Mutex)));
   new (mutex) Mutex();
 
-  util::FunctionThread::RunInOtherThread([&]() {
+  std::thread thread([&]() {
     ASSERT_FALSE(mutex->Lock());
   });
+  thread.join();
   EXPECT_DEATH(
       {
         logging::SetImplementation(new util::DeathTestLogImplementation());
@@ -359,9 +313,10 @@
       static_cast<Mutex *>(shm_malloc_aligned(sizeof(Mutex), alignof(Mutex)));
   new (mutex) Mutex();
 
-  util::FunctionThread::RunInOtherThread([&]() {
+  std::thread thread([&]() {
     ASSERT_FALSE(mutex->Lock());
   });
+  thread.join();
   {
     aos::IPCMutexLocker locker(mutex);
     EXPECT_EQ(Mutex::State::kLockFailed, mutex->TryLock());
diff --git a/aos/robot_state/joystick_state.fbs b/aos/robot_state/joystick_state.fbs
index f21b211..1deaa06 100644
--- a/aos/robot_state/joystick_state.fbs
+++ b/aos/robot_state/joystick_state.fbs
@@ -12,6 +12,8 @@
   pov:int;
 }
 
+enum Alliance : byte { kRed, kBlue, kInvalid }
+
 // This message is checked by all control loops to make sure that the
 // joystick code hasn't died.  It is published on "/aos"
 table JoystickState {
@@ -36,6 +38,12 @@
   // useful for testing. The only difference in behavior should be motors not
   // actually turning on.
   fake:bool;
+
+  // Color of our current alliance.
+  alliance:Alliance;
+
+  // String corresponding to the game data string
+  game_data:string;
 }
 
 root_type JoystickState;
diff --git a/aos/stl_mutex/BUILD b/aos/stl_mutex/BUILD
index f187542..06599e3 100644
--- a/aos/stl_mutex/BUILD
+++ b/aos/stl_mutex/BUILD
@@ -21,6 +21,5 @@
         "//aos:die",
         "//aos/testing:googletest",
         "//aos/testing:test_logging",
-        "//aos/util:thread",
     ],
 )
diff --git a/aos/stl_mutex/stl_mutex_test.cc b/aos/stl_mutex/stl_mutex_test.cc
index 36d7067..0fd052a 100644
--- a/aos/stl_mutex/stl_mutex_test.cc
+++ b/aos/stl_mutex/stl_mutex_test.cc
@@ -3,7 +3,6 @@
 #include "gtest/gtest.h"
 
 #include "aos/testing/test_logging.h"
-#include "aos/util/thread.h"
 #include "aos/die.h"
 
 namespace aos {
diff --git a/aos/util/BUILD b/aos/util/BUILD
index 4d7a3d1..f487892 100644
--- a/aos/util/BUILD
+++ b/aos/util/BUILD
@@ -55,7 +55,6 @@
     ],
     deps = [
         ":run_command",
-        ":thread",
         "//aos/logging",
         "//aos/testing:googletest",
     ],
@@ -146,20 +145,6 @@
 )
 
 cc_library(
-    name = "thread",
-    srcs = [
-        "thread.cc",
-    ],
-    hdrs = [
-        "thread.h",
-    ],
-    deps = [
-        "//aos:macros",
-        "@com_github_google_glog//:glog",
-    ],
-)
-
-cc_library(
     name = "trapezoid_profile",
     srcs = [
         "trapezoid_profile.cc",
diff --git a/aos/util/run_command_test.cc b/aos/util/run_command_test.cc
index 775b716..400d2f9 100644
--- a/aos/util/run_command_test.cc
+++ b/aos/util/run_command_test.cc
@@ -1,8 +1,6 @@
 #include "aos/util/run_command.h"
-
 #include "gtest/gtest.h"
-
-#include "aos/util/thread.h"
+#include <thread>
 
 namespace aos {
 namespace util {
@@ -38,16 +36,14 @@
 
 TEST(RunCommandTest, MultipleThreads) {
   int result1, result2;
-  util::FunctionThread t1([&result1](util::Thread *) {
+  std::thread t1([&result1]() {
     result1 = RunCommand("true");
   });
-  util::FunctionThread t2([&result2](util::Thread *) {
+  std::thread t2([&result2]() {
     result2 = RunCommand("true");
   });
-  t1.Start();
-  t2.Start();
-  t1.WaitUntilDone();
-  t2.WaitUntilDone();
+  t1.join();
+  t2.join();
   ASSERT_NE(-1, result1);
   ASSERT_NE(-1, result2);
   ASSERT_TRUE(WIFEXITED(result1));
diff --git a/aos/util/thread.cc b/aos/util/thread.cc
deleted file mode 100644
index bb5999f..0000000
--- a/aos/util/thread.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-#include "aos/util/thread.h"
-
-#include <pthread.h>
-#include <signal.h>
-
-#include "glog/logging.h"
-
-namespace aos {
-namespace util {
-
-Thread::Thread() : started_(false), joined_(false), should_terminate_(false) {}
-
-Thread::~Thread() {
-  CHECK(!(started_ && !joined_));
-}
-
-void Thread::Start() {
-  CHECK(!started_);
-  started_ = true;
-  CHECK(pthread_create(&thread_, NULL, &Thread::StaticRun, this) == 0);
-}
-
-void Thread::Join() {
-  CHECK(!joined_ && started_);
-  joined_ = true;
-  should_terminate_.store(true);
-  CHECK(pthread_join(thread_, NULL) == 0);
-}
-
-bool Thread::TryJoin() {
-  CHECK(!joined_ && started_);
-#ifdef AOS_SANITIZER_thread
-  // ThreadSanitizer misses the tryjoin and then complains about leaking the
-  // thread. Instead, we'll just check if the thread is still around and then
-  // do a regular Join() iff it isn't.
-  // TODO(brians): Remove this once tsan learns about pthread_tryjoin_np.
-  const int kill_ret = pthread_kill(thread_, 0);
-  // If it's still around.
-  if (kill_ret == 0) return false;
-  // If it died, we'll get ESRCH. Otherwise, something went wrong.
-  if (kill_ret != ESRCH) {
-    errno = kill_ret;
-    PLOG(FATAL) << "pthread_kill(thread_, 0) failed";
-  }
-  Join();
-  return true;
-#else
-  const int ret = pthread_tryjoin_np(thread_, nullptr);
-  if (ret == 0) {
-    joined_ = true;
-    return true;
-  } else if (ret == EBUSY) {
-    return false;
-  } else {
-    errno = ret;
-    PLOG(FATAL) << "pthread_tryjoin_np(thread_, nullptr) failed";
-    return false;
-  }
-#endif
-}
-
-void Thread::RequestStop() {
-  CHECK(!joined_ && started_);
-  should_terminate_.store(true);
-}
-
-void Thread::WaitUntilDone() {
-  CHECK(!joined_ && started_);
-  joined_ = true;
-  CHECK(pthread_join(thread_, NULL) == 0);
-}
-
-void *Thread::StaticRun(void *self) {
-  static_cast<Thread *>(self)->Run();
-  return NULL;
-}
-
-}  // namespace util
-}  // namespace aos
diff --git a/aos/util/thread.h b/aos/util/thread.h
deleted file mode 100644
index 337ea48..0000000
--- a/aos/util/thread.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef AOS_UTIL_THREAD_H_
-#define AOS_UTIL_THREAD_H_
-
-#include <functional>
-#include <atomic>
-
-#include <pthread.h>
-
-#include "aos/macros.h"
-
-namespace aos {
-namespace util {
-
-// A nice wrapper around a pthreads thread.
-//
-// TODO(aschuh): replace this with std::thread
-class Thread {
- public:
-  Thread();
-  virtual ~Thread();
-
-  // Actually creates the thread.
-  void Start();
-
-  // Asks the code to stop and then waits until it has done so.
-  // This or TryJoin() (returning true) must be called exactly once for every
-  // instance.
-  void Join();
-
-  // If the code has already finished, returns true. Does not block waiting if
-  // it isn't.
-  // Join() must not be called on this instance if this returns true.
-  // This must return true or Join() must be called exactly once for every
-  // instance.
-  bool TryJoin();
-
-  // Asks the code to stop (in preparation for a Join()).
-  void RequestStop();
-
-  // Waits until the code has stopped. Does not ask it to do so.
-  void WaitUntilDone();
-
- protected:
-  // Subclasses need to call this periodically if they are going to loop to
-  // check whether they have been asked to stop.
-  bool should_continue() {
-    return !should_terminate_.load();
-  }
-
- private:
-  // Where subclasses actually do something.
-  //
-  // They should not block for long periods of time without checking
-  // should_continue().
-  virtual void Run() = 0;
-
-  static void *StaticRun(void *self);
-
-  pthread_t thread_;
-  bool started_;
-  bool joined_;
-  ::std::atomic_bool should_terminate_;
-
-  DISALLOW_COPY_AND_ASSIGN(Thread);
-};
-
-class FunctionThread : public Thread {
- public:
-  FunctionThread(::std::function<void(FunctionThread *)> function)
-      : function_(function) {}
-
-  // Runs function in a new thread and waits for it to return.
-  static void RunInOtherThread(::std::function<void()> function) {
-    FunctionThread t([&function](FunctionThread *) { function(); });
-    t.Start();
-    t.Join();
-  }
-
- private:
-  virtual void Run() override {
-    function_(this);
-  }
-
-  const ::std::function<void(FunctionThread *)> function_;
-};
-
-}  // namespace util
-}  // namespace aos
-
-#endif  // AOS_UTIL_THREAD_H_
diff --git a/build_tests/BUILD b/build_tests/BUILD
index 0e3b8f7..28dc677 100644
--- a/build_tests/BUILD
+++ b/build_tests/BUILD
@@ -1,6 +1,6 @@
 load("@com_google_protobuf//:protobuf.bzl", "cc_proto_library")
 load("//tools/cpp/emscripten:defs.bzl", "emcc_binary")
-load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_python_library")
+load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_py_library")
 
 emcc_binary(
     name = "helloworld.html",
@@ -99,7 +99,7 @@
     ],
 )
 
-flatbuffer_python_library(
+flatbuffer_py_library(
     name = "test_python_fbs",
     srcs = ["test.fbs"],
     namespace = "aos.examples",
diff --git a/frc971/control_loops/drivetrain/BUILD b/frc971/control_loops/drivetrain/BUILD
index a120c0c..5350240 100644
--- a/frc971/control_loops/drivetrain/BUILD
+++ b/frc971/control_loops/drivetrain/BUILD
@@ -678,3 +678,22 @@
         "@org_tuxfamily_eigen//:eigen",
     ],
 )
+
+cc_library(
+    name = "camera",
+    srcs = ["camera.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//aos/containers:sized_array",
+        "//frc971/control_loops:pose",
+    ],
+)
+
+cc_test(
+    name = "camera_test",
+    srcs = ["camera_test.cc"],
+    deps = [
+        ":camera",
+        "//aos/testing:googletest",
+    ],
+)
diff --git a/y2019/control_loops/drivetrain/camera.h b/frc971/control_loops/drivetrain/camera.h
similarity index 99%
rename from y2019/control_loops/drivetrain/camera.h
rename to frc971/control_loops/drivetrain/camera.h
index e21bf87..02e7e65 100644
--- a/y2019/control_loops/drivetrain/camera.h
+++ b/frc971/control_loops/drivetrain/camera.h
@@ -6,7 +6,7 @@
 #include "aos/containers/sized_array.h"
 #include "frc971/control_loops/pose.h"
 
-namespace y2019 {
+namespace frc971 {
 namespace control_loops {
 
 // Represents a target on the field. Currently just consists of a pose and a
@@ -369,6 +369,6 @@
 }
 
 }  // namespace control_loops
-}  // namespace y2019
+}  // namespace frc971
 
 #endif  // Y2019_CONTROL_LOOPS_DRIVETRAIN_CAMERA_H_
diff --git a/y2019/control_loops/drivetrain/camera_test.cc b/frc971/control_loops/drivetrain/camera_test.cc
similarity index 98%
rename from y2019/control_loops/drivetrain/camera_test.cc
rename to frc971/control_loops/drivetrain/camera_test.cc
index f9b6e8d..c830038 100644
--- a/y2019/control_loops/drivetrain/camera_test.cc
+++ b/frc971/control_loops/drivetrain/camera_test.cc
@@ -1,8 +1,8 @@
-#include "y2019/control_loops/drivetrain/camera.h"
+#include "frc971/control_loops/drivetrain/camera.h"
 
 #include "gtest/gtest.h"
 
-namespace y2019 {
+namespace frc971 {
 namespace control_loops {
 namespace testing {
 
@@ -210,4 +210,4 @@
 
 }  // namespace testing
 }  // namespace control_loops
-}  // namespace y2019
+}  // namespace frc971
diff --git a/frc971/wpilib/joystick_sender.cc b/frc971/wpilib/joystick_sender.cc
index c614cfe..d31d4a5 100644
--- a/frc971/wpilib/joystick_sender.cc
+++ b/frc971/wpilib/joystick_sender.cc
@@ -67,6 +67,18 @@
             joysticks_offset = builder.fbb()->CreateVector(joysticks.begin(),
                                                            joysticks.size());
 
+        flatbuffers::Offset<flatbuffers::String> game_data_offset;
+        if (status == 0) {
+          static_assert(sizeof(match_info.gameSpecificMessage) == 64,
+                        "Check that the match info game specific message size "
+                        "hasn't changed and is still sane.");
+          CHECK_LE(match_info.gameSpecificMessageSize,
+                   sizeof(match_info.gameSpecificMessage));
+          game_data_offset = builder.fbb()->CreateString(
+              reinterpret_cast<const char *>(match_info.gameSpecificMessage),
+              match_info.gameSpecificMessageSize);
+        }
+
         aos::JoystickState::Builder joystick_state_builder =
             builder.MakeBuilder<aos::JoystickState>();
 
@@ -79,12 +91,24 @@
           joystick_state_builder.add_scale_left(
               match_info.gameSpecificMessage[1] == 'L' ||
               match_info.gameSpecificMessage[1] == 'l');
+          joystick_state_builder.add_game_data(game_data_offset);
         }
 
         joystick_state_builder.add_test_mode(ds->IsTestMode());
         joystick_state_builder.add_fms_attached(ds->IsFmsAttached());
         joystick_state_builder.add_enabled(ds->IsEnabled());
         joystick_state_builder.add_autonomous(ds->IsAutonomous());
+        switch (ds->GetAlliance()) {
+          case frc::DriverStation::kRed:
+            joystick_state_builder.add_alliance(aos::Alliance::kRed);
+            break;
+          case frc::DriverStation::kBlue:
+            joystick_state_builder.add_alliance(aos::Alliance::kBlue);
+            break;
+          case frc::DriverStation::kInvalid:
+            joystick_state_builder.add_alliance(aos::Alliance::kInvalid);
+            break;
+        }
         joystick_state_builder.add_team_id(team_id_);
 
         if (!builder.Send(joystick_state_builder.Finish())) {
diff --git a/frc971/wpilib/wpilib_robot_base.h b/frc971/wpilib/wpilib_robot_base.h
index 978c48e..ea68306 100644
--- a/frc971/wpilib/wpilib_robot_base.h
+++ b/frc971/wpilib/wpilib_robot_base.h
@@ -50,7 +50,7 @@
 class WPILibAdapterRobot : public frc::RobotBase {
  public:
   void StartCompetition() override {
-    ::aos::InitNRT(true);
+    ::aos::InitNRT();
 
     robot_.Run();
   }
diff --git a/third_party/flatbuffers/build_defs.bzl b/third_party/flatbuffers/build_defs.bzl
index 2ddc65b..2f6c944 100644
--- a/third_party/flatbuffers/build_defs.bzl
+++ b/third_party/flatbuffers/build_defs.bzl
@@ -254,7 +254,7 @@
         compatible_with = compatible_with,
     )
 
-def flatbuffer_python_library(
+def flatbuffer_py_library(
         name,
         srcs,
         namespace,
diff --git a/y2012/control_loops/accessories/accessories.cc b/y2012/control_loops/accessories/accessories.cc
index 726aede..86105e9 100644
--- a/y2012/control_loops/accessories/accessories.cc
+++ b/y2012/control_loops/accessories/accessories.cc
@@ -47,7 +47,7 @@
 }  // namespace y2012
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2012/control_loops/drivetrain/drivetrain_main.cc b/y2012/control_loops/drivetrain/drivetrain_main.cc
index 735f13f..ad55b26 100644
--- a/y2012/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2012/control_loops/drivetrain/drivetrain_main.cc
@@ -7,7 +7,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2012/joystick_reader.cc b/y2012/joystick_reader.cc
index 48e6eb5..4683b29 100644
--- a/y2012/joystick_reader.cc
+++ b/y2012/joystick_reader.cc
@@ -152,7 +152,7 @@
 }  // namespace y2012
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2014/control_loops/claw/claw_main.cc b/y2014/control_loops/claw/claw_main.cc
index f21c7c5..6e8ff43 100644
--- a/y2014/control_loops/claw/claw_main.cc
+++ b/y2014/control_loops/claw/claw_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2014/control_loops/drivetrain/drivetrain_main.cc b/y2014/control_loops/drivetrain/drivetrain_main.cc
index c210770..651757f 100644
--- a/y2014/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2014/control_loops/drivetrain/drivetrain_main.cc
@@ -7,7 +7,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2014/control_loops/shooter/shooter_main.cc b/y2014/control_loops/shooter/shooter_main.cc
index 4161fdf..6f3b1f2 100644
--- a/y2014/control_loops/shooter/shooter_main.cc
+++ b/y2014/control_loops/shooter/shooter_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2014/joystick_reader.cc b/y2014/joystick_reader.cc
index 6f94bfe..3f52c46 100644
--- a/y2014/joystick_reader.cc
+++ b/y2014/joystick_reader.cc
@@ -444,7 +444,7 @@
 }  // namespace y2014
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2014_bot3/control_loops/drivetrain/drivetrain_main.cc b/y2014_bot3/control_loops/drivetrain/drivetrain_main.cc
index 05d09b3..0eb1bbe 100644
--- a/y2014_bot3/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2014_bot3/control_loops/drivetrain/drivetrain_main.cc
@@ -7,7 +7,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2014_bot3/control_loops/rollers/rollers_main.cc b/y2014_bot3/control_loops/rollers/rollers_main.cc
index 906c067..443e18e 100644
--- a/y2014_bot3/control_loops/rollers/rollers_main.cc
+++ b/y2014_bot3/control_loops/rollers/rollers_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2014_bot3/joystick_reader.cc b/y2014_bot3/joystick_reader.cc
index 47546a7..1c06c8c 100644
--- a/y2014_bot3/joystick_reader.cc
+++ b/y2014_bot3/joystick_reader.cc
@@ -138,7 +138,7 @@
 }  // namespace y2014_bot3
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2016/control_loops/drivetrain/drivetrain_main.cc b/y2016/control_loops/drivetrain/drivetrain_main.cc
index b4fd9d8..ce65e0c 100644
--- a/y2016/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2016/control_loops/drivetrain/drivetrain_main.cc
@@ -7,7 +7,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2016/control_loops/shooter/shooter_main.cc b/y2016/control_loops/shooter/shooter_main.cc
index 6e46114..3545985 100644
--- a/y2016/control_loops/shooter/shooter_main.cc
+++ b/y2016/control_loops/shooter/shooter_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2016/control_loops/superstructure/superstructure_main.cc b/y2016/control_loops/superstructure/superstructure_main.cc
index abe2ec9..108a5a8 100644
--- a/y2016/control_loops/superstructure/superstructure_main.cc
+++ b/y2016/control_loops/superstructure/superstructure_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2016/joystick_reader.cc b/y2016/joystick_reader.cc
index d1b5e78..fbb91a5 100644
--- a/y2016/joystick_reader.cc
+++ b/y2016/joystick_reader.cc
@@ -461,7 +461,7 @@
 }  // namespace y2016
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2017/control_loops/drivetrain/drivetrain_main.cc b/y2017/control_loops/drivetrain/drivetrain_main.cc
index c9da772..f604479 100644
--- a/y2017/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2017/control_loops/drivetrain/drivetrain_main.cc
@@ -7,7 +7,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2017/control_loops/superstructure/superstructure_main.cc b/y2017/control_loops/superstructure/superstructure_main.cc
index eedba6f..6b43d15 100644
--- a/y2017/control_loops/superstructure/superstructure_main.cc
+++ b/y2017/control_loops/superstructure/superstructure_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2017/joystick_reader.cc b/y2017/joystick_reader.cc
index e42085a..c7f0666 100644
--- a/y2017/joystick_reader.cc
+++ b/y2017/joystick_reader.cc
@@ -321,7 +321,7 @@
 }  // namespace y2017
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2018/control_loops/drivetrain/drivetrain_main.cc b/y2018/control_loops/drivetrain/drivetrain_main.cc
index e5c68c5..c9fea5d 100644
--- a/y2018/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2018/control_loops/drivetrain/drivetrain_main.cc
@@ -7,7 +7,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2018/control_loops/superstructure/superstructure_main.cc b/y2018/control_loops/superstructure/superstructure_main.cc
index 3200ead..f072c68 100644
--- a/y2018/control_loops/superstructure/superstructure_main.cc
+++ b/y2018/control_loops/superstructure/superstructure_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2018/joystick_reader.cc b/y2018/joystick_reader.cc
index c9778b8..c8c5385 100644
--- a/y2018/joystick_reader.cc
+++ b/y2018/joystick_reader.cc
@@ -389,7 +389,7 @@
 }  // namespace y2018
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2019/BUILD b/y2019/BUILD
index 328c93f..e286fc7 100644
--- a/y2019/BUILD
+++ b/y2019/BUILD
@@ -36,7 +36,7 @@
         "//frc971:constants",
         "//frc971/control_loops:pose",
         "//frc971/control_loops:static_zeroing_single_dof_profiled_subsystem",
-        "//y2019/control_loops/drivetrain:camera",
+        "//frc971/control_loops/drivetrain:camera",
         "//y2019/control_loops/drivetrain:polydrivetrain_plants",
         "//y2019/control_loops/superstructure/elevator:elevator_plants",
         "//y2019/control_loops/superstructure/intake:intake_plants",
diff --git a/y2019/constants.h b/y2019/constants.h
index 5b00847..d1d0c7c 100644
--- a/y2019/constants.h
+++ b/y2019/constants.h
@@ -6,13 +6,13 @@
 #include <stdint.h>
 
 #include "frc971/constants.h"
+#include "frc971/control_loops/drivetrain/camera.h"
 #include "frc971/control_loops/static_zeroing_single_dof_profiled_subsystem.h"
 #include "y2019/control_loops/drivetrain/drivetrain_dog_motor_plant.h"
 #include "y2019/control_loops/superstructure/elevator/elevator_plant.h"
 #include "y2019/control_loops/superstructure/intake/intake_plant.h"
 #include "y2019/control_loops/superstructure/stilts/stilts_plant.h"
 #include "y2019/control_loops/superstructure/wrist/wrist_plant.h"
-#include "y2019/control_loops/drivetrain/camera.h"
 #include "frc971/control_loops/pose.h"
 
 namespace y2019 {
@@ -32,7 +32,7 @@
 class Field {
  public:
   typedef ::frc971::control_loops::TypedPose<double> Pose;
-  typedef ::y2019::control_loops::TypedTarget<double> Target;
+  typedef ::frc971::control_loops::TypedTarget<double> Target;
   typedef ::frc971::control_loops::TypedLineSegment<double> Obstacle;
 
   static constexpr size_t kNumTargets = 32;
@@ -201,8 +201,9 @@
 
   static constexpr size_t kNumCameras = 5;
   ::std::array<CameraCalibration, kNumCameras> cameras;
-  control_loops::TypedCamera<Field::kNumTargets, Field::kNumObstacles,
-                             double>::NoiseParameters camera_noise_parameters;
+  frc971::control_loops::TypedCamera<Field::kNumTargets, Field::kNumObstacles,
+                                     double>::NoiseParameters
+      camera_noise_parameters;
 };
 
 // Creates (once) a Values instance for ::aos::network::GetTeamNumber() and
diff --git a/y2019/control_loops/drivetrain/BUILD b/y2019/control_loops/drivetrain/BUILD
index bc76d7a..aabd3e5 100644
--- a/y2019/control_loops/drivetrain/BUILD
+++ b/y2019/control_loops/drivetrain/BUILD
@@ -100,30 +100,11 @@
 )
 
 cc_library(
-    name = "camera",
-    srcs = ["camera.h"],
-    visibility = ["//y2019:__pkg__"],
-    deps = [
-        "//aos/containers:sized_array",
-        "//frc971/control_loops:pose",
-    ],
-)
-
-cc_test(
-    name = "camera_test",
-    srcs = ["camera_test.cc"],
-    deps = [
-        ":camera",
-        "//aos/testing:googletest",
-    ],
-)
-
-cc_library(
     name = "localizer",
     hdrs = ["localizer.h"],
     deps = [
-        ":camera",
         "//frc971/control_loops:pose",
+        "//frc971/control_loops/drivetrain:camera",
         "//frc971/control_loops/drivetrain:hybrid_ekf",
     ],
 )
@@ -133,9 +114,9 @@
     srcs = ["target_selector.cc"],
     hdrs = ["target_selector.h"],
     deps = [
-        ":camera",
         ":target_selector_fbs",
         "//frc971/control_loops:pose",
+        "//frc971/control_loops/drivetrain:camera",
         "//frc971/control_loops/drivetrain:localizer",
         "//y2019:constants",
         "//y2019/control_loops/superstructure:superstructure_goal_fbs",
diff --git a/y2019/control_loops/drivetrain/drivetrain_main.cc b/y2019/control_loops/drivetrain/drivetrain_main.cc
index 77df69b..cbc65e0 100644
--- a/y2019/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2019/control_loops/drivetrain/drivetrain_main.cc
@@ -8,7 +8,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2019/control_loops/drivetrain/localizer.h b/y2019/control_loops/drivetrain/localizer.h
index d716535..1575efb 100644
--- a/y2019/control_loops/drivetrain/localizer.h
+++ b/y2019/control_loops/drivetrain/localizer.h
@@ -5,7 +5,7 @@
 #include <memory>
 
 #include "frc971/control_loops/pose.h"
-#include "y2019/control_loops/drivetrain/camera.h"
+#include "frc971/control_loops/drivetrain/camera.h"
 #include "frc971/control_loops/drivetrain/hybrid_ekf.h"
 
 namespace y2019 {
@@ -16,9 +16,11 @@
 class TypedLocalizer
     : public ::frc971::control_loops::drivetrain::HybridEkf<Scalar> {
  public:
-  typedef TypedCamera<num_targets, num_obstacles, Scalar> Camera;
+  typedef frc971::control_loops::TypedCamera<num_targets, num_obstacles, Scalar>
+      Camera;
   typedef typename Camera::TargetView TargetView;
   typedef typename Camera::Pose Pose;
+  typedef typename frc971::control_loops::Target Target;
   typedef ::frc971::control_loops::drivetrain::HybridEkf<Scalar> HybridEkf;
   typedef typename HybridEkf::State State;
   typedef typename HybridEkf::StateSquare StateSquare;
diff --git a/y2019/control_loops/drivetrain/localizer_test.cc b/y2019/control_loops/drivetrain/localizer_test.cc
index 319065e..aa7f22f 100644
--- a/y2019/control_loops/drivetrain/localizer_test.cc
+++ b/y2019/control_loops/drivetrain/localizer_test.cc
@@ -30,6 +30,7 @@
                        kNumTargetsPerFrame, double>
     TestLocalizer;
 typedef typename TestLocalizer::Camera TestCamera;
+typedef typename TestLocalizer::Target Target;
 typedef typename TestCamera::Pose Pose;
 typedef typename TestCamera::LineSegment Obstacle;
 
diff --git a/y2019/control_loops/drivetrain/target_selector.h b/y2019/control_loops/drivetrain/target_selector.h
index 4ab65ef..fdc183f 100644
--- a/y2019/control_loops/drivetrain/target_selector.h
+++ b/y2019/control_loops/drivetrain/target_selector.h
@@ -4,7 +4,7 @@
 #include "frc971/control_loops/pose.h"
 #include "frc971/control_loops/drivetrain/localizer.h"
 #include "y2019/constants.h"
-#include "y2019/control_loops/drivetrain/camera.h"
+#include "frc971/control_loops/drivetrain/camera.h"
 #include "y2019/control_loops/drivetrain/target_selector_generated.h"
 #include "y2019/control_loops/superstructure/superstructure_goal_generated.h"
 
@@ -24,10 +24,12 @@
     : public ::frc971::control_loops::drivetrain::TargetSelectorInterface {
  public:
   typedef ::frc971::control_loops::TypedPose<double> Pose;
+  typedef frc971::control_loops::Target Target;
   // For the virtual camera that we use to identify targets, ignore all
   // obstacles and just assume that we have perfect field of view.
-  typedef TypedCamera<y2019::constants::Field::kNumTargets,
-                      /*num_obstacles=*/0, double> FakeCamera;
+  typedef frc971::control_loops::TypedCamera<
+      y2019::constants::Field::kNumTargets,
+      /*num_obstacles=*/0, double> FakeCamera;
 
   TargetSelector(::aos::EventLoop *event_loop);
 
diff --git a/y2019/control_loops/superstructure/superstructure_main.cc b/y2019/control_loops/superstructure/superstructure_main.cc
index f831774..b98c249 100644
--- a/y2019/control_loops/superstructure/superstructure_main.cc
+++ b/y2019/control_loops/superstructure/superstructure_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main(int /*argc*/, char * /*argv*/ []) {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2019/joystick_reader.cc b/y2019/joystick_reader.cc
index 0f37112..59906c7 100644
--- a/y2019/joystick_reader.cc
+++ b/y2019/joystick_reader.cc
@@ -663,7 +663,7 @@
 }  // namespace y2019
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2020/control_loops/drivetrain/drivetrain_main.cc b/y2020/control_loops/drivetrain/drivetrain_main.cc
index 66b9cc7..04bd107 100644
--- a/y2020/control_loops/drivetrain/drivetrain_main.cc
+++ b/y2020/control_loops/drivetrain/drivetrain_main.cc
@@ -6,7 +6,7 @@
 using ::frc971::control_loops::drivetrain::DrivetrainLoop;
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2020/control_loops/superstructure/superstructure_main.cc b/y2020/control_loops/superstructure/superstructure_main.cc
index 101a0c7..aa74d56 100644
--- a/y2020/control_loops/superstructure/superstructure_main.cc
+++ b/y2020/control_loops/superstructure/superstructure_main.cc
@@ -4,7 +4,7 @@
 #include "aos/init.h"
 
 int main(int /*argc*/, char * /*argv*/ []) {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2020/joystick_reader.cc b/y2020/joystick_reader.cc
index 105506e..06b8e40 100644
--- a/y2020/joystick_reader.cc
+++ b/y2020/joystick_reader.cc
@@ -65,7 +65,7 @@
 }  // namespace y2020
 
 int main() {
-  ::aos::InitNRT(true);
+  ::aos::InitNRT();
 
   aos::FlatbufferDetachedBuffer<aos::Configuration> config =
       aos::configuration::ReadConfig("config.json");
diff --git a/y2020/vision/sift/BUILD b/y2020/vision/sift/BUILD
index 0a2cb3a..ec06126 100644
--- a/y2020/vision/sift/BUILD
+++ b/y2020/vision/sift/BUILD
@@ -1,3 +1,117 @@
+load(":fast_gaussian.bzl", "fast_gaussian")
+load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library", "flatbuffer_py_library")
+
+cc_binary(
+    name = "fast_gaussian_generator",
+    srcs = [
+        "fast_gaussian_generator.cc",
+    ],
+    restricted_to = [
+        "//tools:k8",
+        "//tools:armhf-debian",
+    ],
+    deps = [
+        "//third_party:halide",
+        "//third_party:halide_gengen",
+        "//third_party:opencv",
+        "@com_github_google_glog//:glog",
+    ],
+)
+
+py_binary(
+    name = "fast_gaussian_runner",
+    srcs = [
+        "fast_gaussian_runner.py",
+    ],
+    data = [
+        ":fast_gaussian_generator",
+        # TODO(Brian): Replace this with something more fine-grained from the
+        # configuration fragment or something.
+        "//tools/cpp:toolchain",
+    ],
+    default_python_version = "PY3",
+    main = "fast_gaussian_runner.py",
+    restricted_to = [
+        "//tools:k8",
+        "//tools:armhf-debian",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "@bazel_tools//tools/python/runfiles",
+    ],
+)
+
+# Each element is [sigma, sigma_name, radius].
+# opencv's default width is calculated as:
+#   cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1
+# Pulling that in helps a lot with making it faster (less data to read, and less
+# math to do), but if you make it too narrow SIFT quickly freaks out.
+sigmas = [
+    [
+        "1.2262734984654078",
+        "1p2",
+        "9",
+    ],
+    [
+        "1.5450077936447955",
+        "1p5",
+        "11",
+    ],
+    [
+        "1.9465878414647133",
+        "1p9",
+        "13",
+    ],
+    [
+        "2.4525469969308156",
+        "2p4",
+        "15",
+    ],
+    [
+        "3.0900155872895909",
+        "3p1",
+        "19",
+    ],
+    # TODO(Brian): We only need one of these two for 1280x720. Don't generate
+    # all the redundant versions for other sizes, and maybe stop doing the one
+    # we don't actually use.
+    [
+        "1.2489997148513794",
+        "1p24",
+        "11",
+    ],
+    [
+        "1.5198683738708496",
+        "1p52",
+        "15",
+    ],
+]
+
+sizes = [
+    [
+        1280,
+        720,
+    ],
+    [
+        640,
+        360,
+    ],
+    [
+        320,
+        180,
+    ],
+    [
+        160,
+        90,
+    ],
+    [
+        80,
+        45,
+    ],
+]
+
+fast_gaussian(sigmas, sizes)
+
 cc_library(
     name = "sift971",
     srcs = [
@@ -12,6 +126,81 @@
     ],
     visibility = ["//visibility:public"],
     deps = [
+        ":fast_gaussian",
         "//third_party:opencv",
+        "@com_github_google_glog//:glog",
     ],
 )
+
+cc_library(
+    name = "fast_gaussian",
+    srcs = [
+        "fast_gaussian.cc",
+    ],
+    hdrs = [
+        "fast_gaussian.h",
+    ],
+    restricted_to = [
+        "//tools:k8",
+        "//tools:armhf-debian",
+    ],
+    deps = [
+        ":fast_gaussian_all",
+        "//third_party:halide_runtime",
+        "//third_party:opencv",
+        "@com_github_google_glog//:glog",
+    ],
+)
+
+cc_binary(
+    name = "testing_sift",
+    srcs = [
+        "testing_sift.cc",
+    ],
+    restricted_to = [
+        "//tools:k8",
+        "//tools:armhf-debian",
+    ],
+    deps = [
+        ":fast_gaussian",
+        "//aos:init",
+        "//aos/time",
+        "//third_party:opencv",
+        "//y2020/vision/sift:sift971",
+        "@com_github_google_glog//:glog",
+    ],
+)
+
+flatbuffer_py_library(
+    name = "sift_fbs_python",
+    srcs = [
+        "sift.fbs",
+        "sift_training.fbs",
+    ],
+    namespace = "frc971.vision.sift",
+    tables = [
+        "Feature",
+        "Match",
+        "ImageMatch",
+        "TransformationMatrix",
+        "CameraPose",
+        "ImageMatchResult",
+        "TrainingImage",
+        "TrainingData",
+    ],
+)
+
+flatbuffer_cc_library(
+    name = "sift_fbs",
+    srcs = ["sift.fbs"],
+    gen_reflections = True,
+    visibility = ["//visibility:public"],
+)
+
+flatbuffer_cc_library(
+    name = "sift_training_fbs",
+    srcs = ["sift_training.fbs"],
+    gen_reflections = True,
+    includes = [":sift_fbs_includes"],
+    visibility = ["//visibility:public"],
+)
diff --git a/y2020/vision/sift/fast_gaussian.bzl b/y2020/vision/sift/fast_gaussian.bzl
new file mode 100644
index 0000000..a1c3173
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian.bzl
@@ -0,0 +1,55 @@
+def fast_gaussian(sigmas, sizes):
+  files = []
+  for _, sigma_name, _ in sigmas:
+    for cols, rows in sizes:
+      files.append("fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name))
+  for _, sigma_name, _ in sigmas:
+    for cols, rows in sizes:
+      files.append("fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name))
+  for cols, rows in sizes:
+    files.append('fast_subtract_%dx%d' % (cols, rows))
+
+  params = struct(
+    sigmas = sigmas,
+    sizes = sizes,
+  )
+
+  headers = [f + '.h' for f in files] + [
+    'fast_gaussian_all.h',
+  ]
+  objects = [f + '.o' for f in files] + [
+    'fast_gaussian_runtime.o',
+  ]
+  htmls = [f + '.html' for f in files]
+
+  native.genrule(
+    name = "generate_fast_gaussian",
+    tools = [
+        ":fast_gaussian_runner",
+    ],
+    cmd = ' '.join([
+      '$(location fast_gaussian_runner)',
+      "'" + params.to_json() + "'",
+      # TODO(Brian): This should be RULEDIR once we have support for that.
+      '$(@D)',
+      '$(TARGET_CPU)',
+    ]),
+    outs = headers + objects + htmls,
+    restricted_to = [
+      "//tools:k8",
+      "//tools:armhf-debian",
+    ],
+  )
+
+  native.cc_library(
+    name = 'fast_gaussian_all',
+    hdrs = ['fast_gaussian_all.h'],
+    srcs = headers + objects,
+    deps = [
+      '//third_party:halide_runtime',
+    ],
+    restricted_to = [
+      "//tools:k8",
+      "//tools:armhf-debian",
+    ],
+  )
diff --git a/y2020/vision/sift/fast_gaussian.cc b/y2020/vision/sift/fast_gaussian.cc
new file mode 100644
index 0000000..22549ac
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian.cc
@@ -0,0 +1,126 @@
+#include "y2020/vision/sift/fast_gaussian.h"
+
+#include <iomanip>
+
+#include <opencv2/imgproc.hpp>
+
+#include "y2020/vision/sift/fast_gaussian_all.h"
+
+namespace frc971 {
+namespace vision {
+namespace {
+
+void CheckNonOverlapping(const cv::Mat &a, const cv::Mat &b) {
+  CHECK(a.data > b.data + b.total() * b.elemSize() || a.data < b.data)
+      << ": images may not overlap";
+  CHECK(b.data > a.data + a.total() * a.elemSize() || b.data < a.data)
+      << ": images may not overlap";
+}
+
+// An easy toggle to always fall back to the slow implementations, to verify the
+// results are the same.
+constexpr bool kUseFast = true;
+
+// An easy toggle to print the result of all operations, for verifying that the
+// halide code is doing what we expect.
+constexpr bool kPrintAll = false;
+
+// We deliberately don't generate code for images smaller than this, so don't
+// print warnings about them.
+//
+// The opencv implementations are so fast below this size, the build time to
+// generate halide versions isn't worthwhile.
+constexpr int kMinWarnSize = 80;
+
+bool IsSmall(cv::Size size) {
+  return size.height <= kMinWarnSize && size.width <= kMinWarnSize;
+}
+
+}  // namespace
+
+void FastGaussian(const cv::Mat &source, cv::Mat *destination, double sigma) {
+  CHECK_EQ(source.type(), CV_16SC1);
+
+  destination->create(source.size(), source.type());
+  CheckNonOverlapping(source, *destination);
+
+  int result = 1;
+  if (kUseFast) {
+    result = DoGeneratedFastGaussian(MatToHalide<const int16_t>(source),
+                                     MatToHalide<int16_t>(*destination), sigma);
+  }
+  if (kPrintAll) {
+    LOG(INFO) << result << ": " << source.rows << " " << source.cols << " "
+              << std::setprecision(17) << sigma;
+  }
+  if (result == 0) {
+    return;
+  }
+  if (!IsSmall(source.size())) {
+    LOG(WARNING) << "slow gaussian blur: " << source.rows << " " << source.cols
+                 << " " << std::setprecision(17) << sigma;
+  }
+  CHECK_EQ(result, 1);
+
+  cv::GaussianBlur(source, *destination, cv::Size(), sigma, sigma,
+                   cv::BORDER_REPLICATE);
+}
+
+void FastSubtract(const cv::Mat &a, const cv::Mat &b, cv::Mat *destination) {
+  CHECK(a.size() == b.size());
+  destination->create(a.size(), a.type());
+  CheckNonOverlapping(a, *destination);
+  CheckNonOverlapping(b, *destination);
+
+  int result = 1;
+  if (kUseFast) {
+    result = DoGeneratedFastSubtract(MatToHalide<const int16_t>(a),
+                                     MatToHalide<const int16_t>(b),
+                                     MatToHalide<int16_t>(*destination));
+  }
+  if (kPrintAll) {
+    LOG(INFO) << result << ": " << a.rows << " " << a.cols;
+  }
+  if (result == 0) {
+    return;
+  }
+  if (!IsSmall(a.size())) {
+    LOG(WARNING) << "slow subtract: " << a.rows << " " << a.cols;
+  }
+  CHECK_EQ(result, 1);
+
+  cv::subtract(a, b, *destination);
+}
+
+void FastGaussianAndSubtract(const cv::Mat &source, cv::Mat *blurred,
+                             cv::Mat *difference, double sigma) {
+  CHECK_EQ(source.type(), CV_16SC1);
+  blurred->create(source.size(), source.type());
+  difference->create(source.size(), source.type());
+
+  int result = 1;
+  if (kUseFast) {
+    result = DoGeneratedFastGaussianAndSubtract(
+        MatToHalide<const int16_t>(source), MatToHalide<int16_t>(*blurred),
+        MatToHalide<int16_t>(*difference), sigma);
+  }
+  if (kPrintAll) {
+    LOG(INFO) << result << ": " << source.rows << " " << source.cols << " "
+              << std::setprecision(17) << sigma;
+  }
+  if (result == 0) {
+    return;
+  }
+  if (!IsSmall(source.size())) {
+    LOG(WARNING) << "slow gaussian blur: " << source.rows << " " << source.cols
+                 << " " << std::setprecision(17) << sigma;
+  }
+  CHECK_EQ(result, 1);
+
+  cv::GaussianBlur(source, *blurred, cv::Size(), sigma, sigma,
+                   cv::BORDER_REPLICATE);
+  cv::subtract(*blurred, source, *difference);
+}
+
+}  // namespace vision
+}  // namespace frc971
diff --git a/y2020/vision/sift/fast_gaussian.h b/y2020/vision/sift/fast_gaussian.h
new file mode 100644
index 0000000..580083f
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian.h
@@ -0,0 +1,44 @@
+#ifndef Y2020_VISION_SIFT_FAST_GAUSSIAN_H_
+#define Y2020_VISION_SIFT_FAST_GAUSSIAN_H_
+
+#include <type_traits>
+
+#include <opencv2/core/mat.hpp>
+#include "HalideBuffer.h"
+#include "glog/logging.h"
+
+namespace frc971 {
+namespace vision {
+
+// Returns a Halide buffer representing the data in mat.
+template <typename T>
+inline Halide::Runtime::Buffer<T, 2> MatToHalide(const cv::Mat &mat) {
+  CHECK_EQ(cv::DataType<typename std::remove_const<T>::type>::type, mat.type());
+  // Verify that at<T>(row, col) accesses this address:
+  //   data + sizeof(T) * (row * cols + col)
+  CHECK_EQ(mat.elemSize(), sizeof(T));
+  CHECK_EQ(mat.elemSize1(), sizeof(T));
+  CHECK_EQ(mat.step1(0), static_cast<size_t>(mat.cols));
+  CHECK_EQ(mat.step1(1), 1u);
+  CHECK_EQ(mat.dims, 2);
+  CHECK(mat.isContinuous());
+  return Halide::Runtime::Buffer<T, 2>(reinterpret_cast<T *>(mat.data),
+                                       mat.cols, mat.rows);
+}
+
+// Performs a gaussian blur with the specified sigma, truncated to a reasonable
+// width. Attempts to use faster implementations, but will fall back to
+// cv::GaussianBlur otherwise. Only handles a limited set of Mat formats.
+//
+// source and destination may not overlap.
+//
+// Always uses BORDER_REPLICATE mode.
+void FastGaussian(const cv::Mat &source, cv::Mat *destination, double sigma);
+void FastSubtract(const cv::Mat &a, const cv::Mat &b, cv::Mat *destination);
+void FastGaussianAndSubtract(const cv::Mat &source, cv::Mat *blurred,
+                             cv::Mat *difference, double sigma);
+
+}  // namespace vision
+}  // namespace vision
+
+#endif  // Y2020_VISION_SIFT_FAST_GAUSSIAN_H_
diff --git a/y2020/vision/sift/fast_gaussian_generator.cc b/y2020/vision/sift/fast_gaussian_generator.cc
new file mode 100644
index 0000000..6418618
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian_generator.cc
@@ -0,0 +1,206 @@
+#include <opencv2/core/mat.hpp>
+#include <opencv2/imgproc.hpp>
+#include "Halide.h"
+#include "glog/logging.h"
+
+// This is a Halide "generator". This means it is a binary which generates
+// ahead-of-time optimized functions as directed by command-line arguments.
+// https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html has an
+// introduction to much of the magic in this file.
+
+namespace frc971 {
+namespace vision {
+namespace {
+
+// Returns a function implementating a 1-dimensional gaussian blur convolution.
+Halide::Func GenerateBlur(std::string name, Halide::Func in, int col_step,
+                          int row_step, int radius, cv::Mat kernel,
+                          Halide::Var col, Halide::Var row) {
+  Halide::Expr expr = kernel.at<float>(0) * in(col, row);
+  for (int i = 1; i <= radius; ++i) {
+    expr += kernel.at<float>(i) * (in(col - i * col_step, row - i * row_step) +
+                                   in(col + i * col_step, row + i * row_step));
+  }
+  Halide::Func func(name);
+  func(col, row) = expr;
+  return func;
+}
+
+template <typename T>
+void SetRowMajor(T *buffer_parameter, int cols, int rows) {
+  buffer_parameter->dim(0).set_stride(1);
+  buffer_parameter->dim(0).set_extent(cols);
+  buffer_parameter->dim(0).set_min(0);
+  buffer_parameter->dim(1).set_stride(cols);
+  buffer_parameter->dim(1).set_extent(rows);
+  buffer_parameter->dim(1).set_min(0);
+}
+
+}  // namespace
+
+class GaussianGenerator : public Halide::Generator<GaussianGenerator> {
+ public:
+  GeneratorParam<int> cols{"cols", 0};
+  GeneratorParam<int> rows{"rows", 0};
+  GeneratorParam<double> sigma{"sigma", -1};
+  GeneratorParam<int> filter_width{"filter_width", 0};
+
+  Input<Buffer<int16_t>> input{"input", 2};
+  Output<Buffer<int16_t>> output{"output", 2};
+
+  // We use opencv's naming convention, instead of the (x, y) which most of the
+  // halide examples use. This is easier to keep straight with the row-major
+  // storage order we're using though.
+  // col is first because incrementing the data index by 1 moves over 1 column.
+  Var col{"col"}, row{"row"};
+
+  void generate() {
+    CHECK(cols > 0) << ": Must specify a cols";
+    CHECK(rows > 0) << ": Must specify a rows";
+    CHECK(sigma > 0) << ": Must specify a sigma";
+    CHECK(filter_width > 0) << ": Must specify a filter_width";
+    CHECK((filter_width % 2) == 1)
+        << ": Invalid filter_width: " << static_cast<int>(filter_width);
+
+    SetRowMajor(&input, cols, rows);
+
+    const int radius = (filter_width - 1) / 2;
+    const cv::Mat kernel =
+        cv::getGaussianKernel(filter_width, sigma, CV_32F)
+            .rowRange(radius, filter_width);
+
+    Halide::Func in_bounded = Halide::BoundaryConditions::repeat_edge(input);
+    Halide::Func blur_col =
+        GenerateBlur("blur_col", in_bounded, 1, 0, radius, kernel, col, row);
+    output(col, row) = Halide::cast<int16_t>(
+        GenerateBlur("blur_row", blur_col, 0, 1, radius, kernel, col, row)(col, row));
+
+    // Vectorize along the col dimension. Most of the data needed by each lane
+    // overlaps this way. This also has the advantage of being the first
+    // dimension, so as we move along it we will have good data locality.
+    blur_col.vectorize(col, 8);
+
+    // The fun part: we tile the algorithm. This tile size is designed to fit
+    // within each CPU core's L1 cache. On the boundaries of the tiles, we end
+    // re-computing the first blur, but fitting within the L1 cache is worth it.
+    Halide::Var col_inner("col_inner"), row_inner("row_inner");
+    output.tile(col, row, col_inner, row_inner, 64, 32);
+    Halide::Var tile_index("tile_index");
+    output.fuse(col, row, tile_index);
+
+    // Compute the first blur as needed for the second one, within each tile.
+    blur_col.compute_at(output, tile_index);
+    // And then vectorize the second blur within each tile.
+    output.vectorize(col_inner, 8);
+
+    // Lastly, compute all the tiles in parallel.
+    output.parallel(tile_index);
+
+    SetRowMajor(&output, cols, rows);
+  }
+};
+
+class SubtractGenerator : public Halide::Generator<SubtractGenerator> {
+ public:
+  GeneratorParam<int> cols{"cols", 0};
+  GeneratorParam<int> rows{"rows", 0};
+
+  Input<Buffer<int16_t>> input_a{"input_a", 2};
+  Input<Buffer<int16_t>> input_b{"input_b", 2};
+  Output<Buffer<int16_t>> output{"output", 2};
+
+  Var col{"col"}, row{"row"};
+
+  void generate() {
+    CHECK(cols > 0) << ": Must specify a cols";
+    CHECK(rows > 0) << ": Must specify a rows";
+
+    SetRowMajor(&input_a, cols, rows);
+    SetRowMajor(&input_b, cols, rows);
+
+    output(col, row) = Halide::saturating_cast<int16_t>(
+        Halide::cast<int32_t>(input_a(col, row)) - input_b(col, row));
+    output.vectorize(col, 16);
+
+    SetRowMajor(&output, cols, rows);
+  }
+};
+
+class GaussianAndSubtractGenerator
+    : public Halide::Generator<GaussianAndSubtractGenerator> {
+ public:
+  GeneratorParam<int> cols{"cols", 0};
+  GeneratorParam<int> rows{"rows", 0};
+  GeneratorParam<double> sigma{"sigma", -1};
+  GeneratorParam<int> filter_width{"filter_width", 0};
+
+  Input<Buffer<int16_t>> input{"input", 2};
+  Output<Buffer<int16_t>> blurred{"blurred", 2};
+  Output<Buffer<int16_t>> difference{"difference", 2};
+
+  // We use opencv's naming convention, instead of the (x, y) which most of the
+  // halide examples use. This is easier to keep straight with the row-major
+  // storage order we're using though.
+  // col is first because incrementing the data index by 1 moves over 1 column.
+  Var col{"col"}, row{"row"};
+
+  void generate() {
+    CHECK(cols > 0) << ": Must specify a cols";
+    CHECK(rows > 0) << ": Must specify a rows";
+    CHECK(sigma > 0) << ": Must specify a sigma";
+    CHECK(filter_width > 0) << ": Must specify a filter_width";
+    CHECK((filter_width % 2) == 1)
+        << ": Invalid filter_width: " << static_cast<int>(filter_width);
+
+    SetRowMajor(&input, cols, rows);
+
+    const int radius = (filter_width - 1) / 2;
+    const cv::Mat kernel =
+        cv::getGaussianKernel(filter_width, sigma, CV_32F)
+            .rowRange(radius, filter_width);
+
+    Halide::Func in_bounded = Halide::BoundaryConditions::repeat_edge(input);
+    Halide::Func blur_col =
+        GenerateBlur("blur_col", in_bounded, 1, 0, radius, kernel, col, row);
+    blurred(col, row) = Halide::cast<int16_t>(
+        GenerateBlur("blur_row", blur_col, 0, 1, radius, kernel, col, row)(col, row));
+    difference(col, row) = Halide::saturating_cast<int16_t>(
+        Halide::cast<int32_t>(blurred(col, row)) - input(col, row));
+
+    // Vectorize along the col dimension. Most of the data needed by each lane
+    // overlaps this way. This also has the advantage of being the first
+    // dimension, so as we move along it we will have good data locality.
+    blur_col.vectorize(col, 8);
+
+    // The fun part: we tile the algorithm. This tile size is designed to fit
+    // within each CPU core's L1 cache. On the boundaries of the tiles, we end
+    // re-computing the first blur, but fitting within the L1 cache is worth it.
+    Halide::Var col_inner("col_inner"), row_inner("row_inner");
+    blurred.tile(col, row, col_inner, row_inner, 64, 32);
+    Halide::Var tile_index("tile_index");
+    blurred.fuse(col, row, tile_index);
+
+    // Compute the first blur as needed for the second one, within each tile.
+    blur_col.compute_at(blurred, tile_index);
+    // And then vectorize the second blur within each tile.
+    blurred.vectorize(col_inner, 8);
+
+    // Lastly, compute all the tiles in parallel.
+    blurred.parallel(tile_index);
+    blurred.compute_root();
+
+    // TODO(Brian): Calulate difference within each of the tiles to speed things
+    // up.
+
+    SetRowMajor(&blurred, cols, rows);
+    SetRowMajor(&difference, cols, rows);
+  }
+};
+
+}  // namespace vision
+}  // namespace frc971
+
+HALIDE_REGISTER_GENERATOR(frc971::vision::GaussianGenerator, gaussian_generator)
+HALIDE_REGISTER_GENERATOR(frc971::vision::SubtractGenerator, subtract_generator)
+HALIDE_REGISTER_GENERATOR(frc971::vision::GaussianAndSubtractGenerator,
+                          gaussian_and_subtract_generator)
diff --git a/y2020/vision/sift/fast_gaussian_runner.py b/y2020/vision/sift/fast_gaussian_runner.py
new file mode 100755
index 0000000..9699fef
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian_runner.py
@@ -0,0 +1,194 @@
+from __future__ import print_function
+
+import json
+import sys
+import subprocess
+import os
+import threading
+
+from bazel_tools.tools.python.runfiles import runfiles
+
+def main(params):
+  r = runfiles.Create()
+  generator = r.Rlocation('org_frc971/y2020/vision/sift/fast_gaussian_generator')
+
+  ruledir = sys.argv[2]
+  target_cpu = sys.argv[3]
+
+  target = {
+      'armhf-debian': 'arm-32-linux-no_asserts',
+      'k8': 'x86-64-linux-no_asserts',
+  }[target_cpu]
+
+  commands = []
+
+  env = os.environ.copy()
+  env['LD_LIBRARY_PATH'] = ':'.join([
+      'debian_amd64_sysroot/lib/x86_64-linux-gnu',
+      'debian_amd64_sysroot/lib',
+      'debian_amd64_sysroot/usr/lib/x86_64-linux-gnu',
+      'debian_amd64_sysroot/usr/lib',
+  ])
+
+  all_header = [
+      '#ifndef Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+      '#define Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+      '#include "HalideBuffer.h"',
+  ]
+
+  for cols, rows in params['sizes']:
+    for sigma, sigma_name, filter_width in params['sigmas']:
+      name = "fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name)
+
+      commands.append([
+          generator,
+          '-g', 'gaussian_generator',
+          '-o', ruledir,
+          '-f', name,
+          '-e', 'o,h,html',
+          'target=%s-no_runtime' % target,
+          'cols=%s' % cols,
+          'rows=%s' % rows,
+          'sigma=%s' % sigma,
+          'filter_width=%s' % filter_width,
+      ])
+      all_header += [
+          '#include "y2020/vision/sift/%s.h"' % name,
+      ]
+
+      name = "fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name)
+
+      commands.append([
+          generator,
+          '-g', 'gaussian_and_subtract_generator',
+          '-o', ruledir,
+          '-f', name,
+          '-e', 'o,h,html',
+          'target=%s-no_runtime' % target,
+          'cols=%s' % cols,
+          'rows=%s' % rows,
+          'sigma=%s' % sigma,
+          'filter_width=%s' % filter_width,
+      ])
+      all_header += [
+          '#include "y2020/vision/sift/%s.h"' % name,
+      ]
+
+    name = 'fast_subtract_%dx%d' % (cols, rows)
+    commands.append([
+        generator,
+        '-g', 'subtract_generator',
+        '-o', ruledir,
+        '-f', name,
+        '-e', 'o,h,html',
+        'target=%s-no_runtime' % target,
+        'cols=%s' % cols,
+        'rows=%s' % rows,
+    ])
+    all_header += [
+        '#include "y2020/vision/sift/%s.h"' % name,
+    ]
+  commands.append([
+      generator,
+      '-r', 'fast_gaussian_runtime',
+      '-o', ruledir,
+      '-e', 'o',
+      'target=%s' % target,
+  ])
+
+  all_header += [
+      'namespace frc971 {',
+      'namespace vision {',
+      '// 0 is success. 1 is non-implemented size. Negative is a Halide error.',
+      'inline int DoGeneratedFastGaussian(',
+      '    Halide::Runtime::Buffer<const int16_t, 2> input,',
+      '    Halide::Runtime::Buffer<int16_t, 2> output,',
+      '    double sigma) {',
+  ]
+
+  for sigma, sigma_name, filter_width in params['sigmas']:
+    for cols, rows in params['sizes']:
+      name = "fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name)
+      all_header += [
+          '  if (input.dim(0).extent() == %s' % cols,
+          '      && input.dim(1).extent() == %s' % rows,
+          '      && sigma == %s) {' % sigma,
+          '    return %s(input, output);' % name,
+          '  }',
+      ]
+
+  all_header += [
+      '  return 1;',
+      '}',
+      'inline int DoGeneratedFastGaussianAndSubtract(',
+      '    Halide::Runtime::Buffer<const int16_t, 2> input,',
+      '    Halide::Runtime::Buffer<int16_t, 2> blurred,',
+      '    Halide::Runtime::Buffer<int16_t, 2> difference,',
+      '    double sigma) {',
+  ]
+
+  for sigma, sigma_name, filter_width in params['sigmas']:
+    for cols, rows in params['sizes']:
+      name = "fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name)
+      all_header += [
+          '  if (input.dim(0).extent() == %s' % cols,
+          '      && input.dim(1).extent() == %s' % rows,
+          '      && sigma == %s) {' % sigma,
+          '    return %s(input, blurred, difference);' % name,
+          '  }',
+      ]
+
+  all_header += [
+      '  return 1;',
+      '}',
+      'inline int DoGeneratedFastSubtract('
+      '    Halide::Runtime::Buffer<const int16_t, 2> input_a,',
+      '    Halide::Runtime::Buffer<const int16_t, 2> input_b,',
+      '    Halide::Runtime::Buffer<int16_t, 2> output) {',
+  ]
+  for cols, rows in params['sizes']:
+    name = 'fast_subtract_%dx%d' % (cols, rows)
+    all_header += [
+        '  if (input_a.dim(0).extent() == %s' % cols,
+        '      && input_a.dim(1).extent() == %s) {' % rows,
+        '    return %s(input_a, input_b, output);' % name,
+        '  }',
+    ]
+  all_header += [
+      '  return 1;',
+      '}',
+      '}  // namespace vision',
+      '}  // namespace frc971',
+      '#endif  // Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+  ]
+
+  with open(os.path.join(ruledir, 'fast_gaussian_all.h'), 'w') as f:
+    f.writelines([line + '\n' for line in all_header])
+
+  commands_lock = threading.Lock()
+  success = [True]
+
+  def run_commands():
+    while True:
+      with commands_lock:
+        if not commands:
+          return
+        if not success[0]:
+          return
+        command = commands.pop()
+      try:
+        subprocess.check_call(command, env=env)
+      except:
+        with commands_lock:
+          success[0] = False
+        raise
+  threads = [threading.Thread(target=run_commands) for _ in range(4)]
+  for thread in threads:
+    thread.start()
+  for thread in threads:
+    thread.join()
+  if not success[0]:
+    sys.exit(1)
+
+if __name__ == '__main__':
+  main(json.loads(sys.argv[1]))
diff --git a/y2020/vision/sift/sift.fbs b/y2020/vision/sift/sift.fbs
new file mode 100644
index 0000000..84697de
--- /dev/null
+++ b/y2020/vision/sift/sift.fbs
@@ -0,0 +1,87 @@
+namespace frc971.vision.sift;
+
+// Represents a single feature extracted from an image.
+table Feature {
+  // Contains the descriptor data.
+  //
+  // TODO(Brian): These are scaled to be convertible to chars. Should we do
+  // that to minimize storage space? Or maybe int16?
+  //
+  // The size of this depends on the parameters. It is width*width*hist_bins.
+  // Currently we have width=4 and hist_bins=8, which results in a size of
+  // 4*4*8=128.
+  descriptor:[float];
+
+  // Location of the keypoint.
+  x:float;
+  y:float;
+
+  // Size of the keypoint neighborhood.
+  size:float;
+
+  // Angle of the keypoint.
+  // This is in [0,360) clockwise.
+  angle:float;
+
+  // How good of a keypoint this is.
+  response:float;
+
+  // Which octave this keypoint is from.
+  octave:int;
+}
+
+// Represents a single match between a training image and a query image.
+table Match {
+  // The index of the feature for the query image.
+  query_feature:int;
+  // The index of the feature for the training image.
+  train_feature:int;
+}
+
+// Represents all the matches between a single training image and a query
+// image.
+table ImageMatch {
+  matches:[Match];
+  // The index of the training image within all the training images.
+  train_image:int;
+}
+
+table TransformationMatrix {
+  // The matrix data. This is a row-major 3x4 matrix.
+  data:[double];
+}
+
+// Contains the information the EKF wants from an image.
+//
+// This is represented as a transformation to a target in field coordinates.
+table CameraPose {
+  // Transformation matrix from the target to the camera's origin.
+  camera_to_target:TransformationMatrix;
+
+  // Field coordinates of the target, represented as a transformation matrix
+  // from the target to the field.
+  // (0, 0, 0) is the center of the field, on the level of most of the field
+  // (not the region under the truss). Positive X is towards the red alliance's
+  // PLAYER STATION. Positive Z is up. The coordinate system is right-handed.
+  //
+  // Note that the red PLAYER STATION is where the red drive teams are. This is
+  // often where the blue robots are shooting towards.
+  //
+  // The value here will be selected from a small, static set of targets we
+  // train images on.
+  field_to_target:TransformationMatrix;
+}
+
+table ImageMatchResult {
+  // The matches from this image to each of the training images which matched.
+  image_matches:[ImageMatch];
+  // The transformations for this image for each of the training images which
+  // matched.
+  // TODO(Brian): Include some kind of covariance information for these.
+  camera_poses:[CameraPose];
+
+  // The features for this image.
+  features:[Feature];
+}
+
+root_type ImageMatchResult;
diff --git a/y2020/vision/sift/sift971.cc b/y2020/vision/sift/sift971.cc
index 6223f77..7152906 100644
--- a/y2020/vision/sift/sift971.cc
+++ b/y2020/vision/sift/sift971.cc
@@ -111,6 +111,9 @@
 #include <stdarg.h>
 #include <opencv2/core/hal/hal.hpp>
 #include <opencv2/imgproc.hpp>
+#include "glog/logging.h"
+
+#include "y2020/vision/sift/fast_gaussian.h"
 
 using namespace cv;
 
@@ -158,7 +161,7 @@
 // factor used to convert floating-point descriptor to unsigned char
 static const float SIFT_INT_DESCR_FCTR = 512.f;
 
-#define DoG_TYPE_SHORT 0
+#define DoG_TYPE_SHORT 1
 #if DoG_TYPE_SHORT
 // intermediate type used for DoG pyramids
 typedef short sift_wt;
@@ -177,37 +180,7 @@
   scale = octave >= 0 ? 1.f / (1 << octave) : (float)(1 << -octave);
 }
 
-static Mat createInitialImage(const Mat &img, bool doubleImageSize,
-                              float sigma) {
-  Mat gray, gray_fpt;
-  if (img.channels() == 3 || img.channels() == 4) {
-    cvtColor(img, gray, COLOR_BGR2GRAY);
-    gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
-  } else
-    img.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
-
-  float sig_diff;
-
-  if (doubleImageSize) {
-    sig_diff = sqrtf(
-        std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f));
-    Mat dbl;
-#if DoG_TYPE_SHORT
-    resize(gray_fpt, dbl, Size(gray_fpt.cols * 2, gray_fpt.rows * 2), 0, 0,
-           INTER_LINEAR_EXACT);
-#else
-    resize(gray_fpt, dbl, Size(gray_fpt.cols * 2, gray_fpt.rows * 2), 0, 0,
-           INTER_LINEAR);
-#endif
-    GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff);
-    return dbl;
-  } else {
-    sig_diff = sqrtf(
-        std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f));
-    GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff);
-    return gray_fpt;
-  }
-}
+constexpr bool kLogTiming = false;
 
 }  // namespace
 
@@ -229,14 +202,19 @@
   for (int o = 0; o < nOctaves; o++) {
     for (int i = 0; i < nOctaveLayers + 3; i++) {
       Mat &dst = pyr[o * (nOctaveLayers + 3) + i];
-      if (o == 0 && i == 0) dst = base;
-      // base of new octave is halved image from end of previous octave
-      else if (i == 0) {
+      if (o == 0 && i == 0) {
+        dst = base;
+      } else if (i == 0) {
+        // base of new octave is halved image from end of previous octave
         const Mat &src = pyr[(o - 1) * (nOctaveLayers + 3) + nOctaveLayers];
         resize(src, dst, Size(src.cols / 2, src.rows / 2), 0, 0, INTER_NEAREST);
       } else {
         const Mat &src = pyr[o * (nOctaveLayers + 3) + i - 1];
-        GaussianBlur(src, dst, Size(), sig[i], sig[i]);
+        if (use_fast_gaussian_pyramid_) {
+          FastGaussian(src, &dst, sig[i]);
+        } else {
+          GaussianBlur(src, dst, Size(), sig[i], sig[i]);
+        }
       }
     }
   }
@@ -247,8 +225,12 @@
 class buildDoGPyramidComputer : public ParallelLoopBody {
  public:
   buildDoGPyramidComputer(int _nOctaveLayers, const std::vector<Mat> &_gpyr,
-                          std::vector<Mat> &_dogpyr)
-      : nOctaveLayers(_nOctaveLayers), gpyr(_gpyr), dogpyr(_dogpyr) {}
+                          std::vector<Mat> &_dogpyr,
+                          bool use_fast_subtract_dogpyr)
+      : nOctaveLayers(_nOctaveLayers),
+        gpyr(_gpyr),
+        dogpyr(_dogpyr),
+        use_fast_subtract_dogpyr_(use_fast_subtract_dogpyr) {}
 
   void operator()(const cv::Range &range) const override {
     const int begin = range.start;
@@ -260,15 +242,21 @@
 
       const Mat &src1 = gpyr[o * (nOctaveLayers + 3) + i];
       const Mat &src2 = gpyr[o * (nOctaveLayers + 3) + i + 1];
+      CHECK_EQ(a, o * (nOctaveLayers + 2) + i);
       Mat &dst = dogpyr[o * (nOctaveLayers + 2) + i];
-      subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
+      if (use_fast_subtract_dogpyr_) {
+        FastSubtract(src2, src1, &dst);
+      } else {
+        subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
+      }
     }
   }
 
  private:
-  int nOctaveLayers;
+  const int nOctaveLayers;
   const std::vector<Mat> &gpyr;
   std::vector<Mat> &dogpyr;
+  const bool use_fast_subtract_dogpyr_;
 };
 
 }  // namespace
@@ -278,8 +266,97 @@
   int nOctaves = (int)gpyr.size() / (nOctaveLayers + 3);
   dogpyr.resize(nOctaves * (nOctaveLayers + 2));
 
+#if 0
   parallel_for_(Range(0, nOctaves * (nOctaveLayers + 2)),
-                buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr));
+                buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr, use_fast_subtract_dogpyr_));
+#else
+  buildDoGPyramidComputer(
+      nOctaveLayers, gpyr, dogpyr,
+      use_fast_subtract_dogpyr_)(Range(0, nOctaves * (nOctaveLayers + 2)));
+#endif
+}
+
+// base is the image to start with.
+// gpyr is the pyramid of gaussian blurs. This is both an output and a place
+// where we store intermediates.
+// dogpyr is the pyramid of gaussian differences which we fill out.
+// number_octaves is the number of octaves to calculate.
+void SIFT971_Impl::buildGaussianAndDifferencePyramid(
+    const cv::Mat &base, std::vector<cv::Mat> &gpyr,
+    std::vector<cv::Mat> &dogpyr, int number_octaves) const {
+  const int layers_per_octave = nOctaveLayers;
+  // We use the base (possibly after downscaling) as the first "blurred" image.
+  // Then we calculate 2 more than the number of octaves.
+  // TODO(Brian): Why are there 2 extra?
+  const int gpyr_layers_per_octave = layers_per_octave + 3;
+  // There is 1 less difference than the number of blurs.
+  const int dogpyr_layers_per_octave = gpyr_layers_per_octave - 1;
+  gpyr.resize(number_octaves * gpyr_layers_per_octave);
+  dogpyr.resize(number_octaves * dogpyr_layers_per_octave);
+
+  std::vector<double> sig(gpyr_layers_per_octave);
+  // precompute Gaussian sigmas using the following formula:
+  //  \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
+  sig[0] = sigma;
+  double k = std::pow(2., 1. / layers_per_octave);
+  for (int i = 1; i < gpyr_layers_per_octave; i++) {
+    double sig_prev = std::pow<double>(k, i - 1) * sigma;
+    double sig_total = sig_prev * k;
+    sig[i] = std::sqrt(sig_total * sig_total - sig_prev * sig_prev);
+  }
+
+  for (int octave = 0; octave < number_octaves; octave++) {
+    // At the beginning of each octave, calculate the new base image.
+    {
+      Mat &dst = gpyr[octave * gpyr_layers_per_octave];
+      if (octave == 0) {
+        // For the first octave, it's just the base image.
+        dst = base;
+      } else {
+        // For the other octaves, it's a halved version of the end of the
+        // previous octave.
+        const Mat &src = gpyr[(octave - 1) * gpyr_layers_per_octave +
+                              gpyr_layers_per_octave - 1];
+        resize(src, dst, Size(src.cols / 2, src.rows / 2), 0, 0, INTER_NEAREST);
+      }
+    }
+    // We start with layer==1 because the "first layer" is just the base image
+    // (or a downscaled version of it).
+    for (int layer = 1; layer < gpyr_layers_per_octave; layer++) {
+      // The index where the current layer starts.
+      const int layer_index = octave * gpyr_layers_per_octave + layer;
+      if (use_fast_pyramid_difference_) {
+        const Mat &input = gpyr[layer_index - 1];
+        Mat &blurred = gpyr[layer_index];
+        Mat &difference =
+            dogpyr[octave * dogpyr_layers_per_octave + (layer - 1)];
+        FastGaussianAndSubtract(input, &blurred, &difference, sig[layer]);
+      } else {
+        // First, calculate the new gaussian blur.
+        {
+          const Mat &src = gpyr[layer_index - 1];
+          Mat &dst = gpyr[layer_index];
+          if (use_fast_gaussian_pyramid_) {
+            FastGaussian(src, &dst, sig[layer]);
+          } else {
+            GaussianBlur(src, dst, Size(), sig[layer], sig[layer]);
+          }
+        }
+
+        // Then, calculate the difference from the previous one.
+        {
+          const Mat &src1 = gpyr[layer_index - 1];
+          const Mat &src2 = gpyr[layer_index];
+          Mat &dst = dogpyr[octave * dogpyr_layers_per_octave + (layer - 1)];
+          if (use_fast_subtract_dogpyr_) {
+            FastSubtract(src2, src1, &dst);
+          } else {
+            subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
+          }
+        }
+      }
+    }
+  }
 }
 
 namespace {
@@ -1073,7 +1150,7 @@
                                     std::vector<KeyPoint> &keypoints,
                                     OutputArray _descriptors,
                                     bool useProvidedKeypoints) {
-  int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0;
+  int firstOctave = 0, actualNOctaves = 0, actualNLayers = 0;
   Mat image = _image.getMat(), mask = _mask.getMat();
 
   if (image.empty() || image.depth() != CV_8U)
@@ -1084,6 +1161,7 @@
     CV_Error(Error::StsBadArg, "mask has incorrect type (!=CV_8UC1)");
 
   if (useProvidedKeypoints) {
+    LOG_IF(INFO, kLogTiming);
     firstOctave = 0;
     int maxOctave = INT_MIN;
     for (size_t i = 0; i < keypoints.size(); i++) {
@@ -1100,35 +1178,39 @@
     actualNOctaves = maxOctave - firstOctave + 1;
   }
 
-  Mat base = createInitialImage(image, firstOctave < 0, (float)sigma);
+  LOG_IF(INFO, kLogTiming);
+  Mat base = createInitialImage(image, firstOctave < 0);
+  LOG_IF(INFO, kLogTiming);
   std::vector<Mat> gpyr;
-  int nOctaves =
-      actualNOctaves > 0
-          ? actualNOctaves
-          : cvRound(std::log((double)std::min(base.cols, base.rows)) /
-                        std::log(2.) -
-                    2) -
-                firstOctave;
-
-  // double t, tf = getTickFrequency();
-  // t = (double)getTickCount();
-  buildGaussianPyramid(base, gpyr, nOctaves);
-
-  // t = (double)getTickCount() - t;
-  // printf("pyramid construction time: %g\n", t*1000./tf);
+  int nOctaves;
+  if (actualNOctaves > 0) {
+    nOctaves = actualNOctaves;
+  } else {
+    nOctaves = cvRound(std::log((double)std::min(base.cols, base.rows)) /
+                           std::log(2.) -
+                       2) -
+               firstOctave;
+  }
 
   if (!useProvidedKeypoints) {
     std::vector<Mat> dogpyr;
-    buildDoGPyramid(gpyr, dogpyr);
-    // t = (double)getTickCount();
+    if (use_fused_pyramid_difference_) {
+      buildGaussianAndDifferencePyramid(base, gpyr, dogpyr, nOctaves);
+      LOG_IF(INFO, kLogTiming);
+    } else {
+      buildGaussianPyramid(base, gpyr, nOctaves);
+      LOG_IF(INFO, kLogTiming);
+
+      buildDoGPyramid(gpyr, dogpyr);
+      LOG_IF(INFO, kLogTiming);
+    }
+
     findScaleSpaceExtrema(gpyr, dogpyr, keypoints);
     // TODO(Brian): I think it can go faster by knowing they're sorted?
     // KeyPointsFilter::removeDuplicatedSorted( keypoints );
     KeyPointsFilter::removeDuplicated(keypoints);
 
     if (nfeatures > 0) KeyPointsFilter::retainBest(keypoints, nfeatures);
-    // t = (double)getTickCount() - t;
-    // printf("keypoint detection time: %g\n", t*1000./tf);
 
     if (firstOctave < 0)
       for (size_t i = 0; i < keypoints.size(); i++) {
@@ -1140,20 +1222,54 @@
       }
 
     if (!mask.empty()) KeyPointsFilter::runByPixelsMask(keypoints, mask);
+    LOG_IF(INFO, kLogTiming);
   } else {
+    buildGaussianPyramid(base, gpyr, nOctaves);
+    LOG_IF(INFO, kLogTiming);
     // filter keypoints by mask
     // KeyPointsFilter::runByPixelsMask( keypoints, mask );
   }
 
   if (_descriptors.needed()) {
-    // t = (double)getTickCount();
     int dsize = descriptorSize();
     _descriptors.create((int)keypoints.size(), dsize, CV_32F);
     Mat descriptors = _descriptors.getMat();
 
     calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
-    // t = (double)getTickCount() - t;
-    // printf("descriptor extraction time: %g\n", t*1000./tf);
+    LOG_IF(INFO, kLogTiming);
+  }
+}
+
+Mat SIFT971_Impl::createInitialImage(const Mat &img,
+                                     bool doubleImageSize) const {
+  Mat gray, gray_fpt;
+  if (img.channels() == 3 || img.channels() == 4) {
+    cvtColor(img, gray, COLOR_BGR2GRAY);
+    gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
+  } else {
+    img.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
+  }
+
+  float sig_diff;
+
+  Mat maybe_doubled;
+  if (doubleImageSize) {
+    sig_diff = std::sqrt(
+        std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01));
+    resize(gray_fpt, maybe_doubled, Size(gray_fpt.cols * 2, gray_fpt.rows * 2),
+           0, 0, INTER_LINEAR);
+  } else {
+    sig_diff = std::sqrt(
+        std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01));
+    maybe_doubled = gray_fpt;
+  }
+  if (use_fast_guassian_initial_) {
+    Mat temp;
+    FastGaussian(maybe_doubled, &temp, sig_diff);
+    return temp;
+  } else {
+    GaussianBlur(maybe_doubled, maybe_doubled, Size(), sig_diff, sig_diff);
+    return maybe_doubled;
   }
 }
 
diff --git a/y2020/vision/sift/sift971.h b/y2020/vision/sift/sift971.h
index d58dec8..b351d70 100644
--- a/y2020/vision/sift/sift971.h
+++ b/y2020/vision/sift/sift971.h
@@ -41,6 +41,10 @@
                             int nOctaves) const;
   void buildDoGPyramid(const std::vector<cv::Mat> &pyr,
                        std::vector<cv::Mat> &dogpyr) const;
+  void buildGaussianAndDifferencePyramid(const cv::Mat &base,
+                                         std::vector<cv::Mat> &pyr,
+                                         std::vector<cv::Mat> &dogpyr,
+                                         int nOctaves) const;
   void findScaleSpaceExtrema(const std::vector<cv::Mat> &gauss_pyr,
                              const std::vector<cv::Mat> &dog_pyr,
                              std::vector<cv::KeyPoint> &keypoints) const;
@@ -51,6 +55,15 @@
   CV_PROP_RW double contrastThreshold;
   CV_PROP_RW double edgeThreshold;
   CV_PROP_RW double sigma;
+
+ private:
+  cv::Mat createInitialImage(const cv::Mat &img, bool doubleImageSize) const;
+
+  bool use_fast_gaussian_pyramid_ = true;
+  bool use_fast_subtract_dogpyr_ = true;
+  bool use_fast_guassian_initial_ = true;
+  bool use_fused_pyramid_difference_ = true;
+  bool use_fast_pyramid_difference_ = true;
 };
 
 }  // namespace vision
diff --git a/y2020/vision/sift/sift_training.fbs b/y2020/vision/sift/sift_training.fbs
new file mode 100644
index 0000000..2af0233
--- /dev/null
+++ b/y2020/vision/sift/sift_training.fbs
@@ -0,0 +1,23 @@
+include "y2020/vision/sift/sift.fbs";
+
+namespace frc971.vision.sift;
+
+// Represents a single image we train against.
+table TrainingImage {
+  features:[Feature];
+
+  // Field coordinates of the target, represented as a transformation matrix
+  // from the target to the field. See CameraPose in :sift_fbs for details of
+  // the conventions of this.
+  field_to_target:TransformationMatrix;
+
+  // Coordinates of the target in the training image.
+  camera_to_target:TransformationMatrix;
+}
+
+// Represents the information used to match incoming images against.
+table TrainingData {
+  images:[TrainingImage];
+}
+
+root_type TrainingData;
diff --git a/y2020/vision/sift/testing_sift.cc b/y2020/vision/sift/testing_sift.cc
new file mode 100644
index 0000000..d4b1306
--- /dev/null
+++ b/y2020/vision/sift/testing_sift.cc
@@ -0,0 +1,87 @@
+#include <memory>
+
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "aos/init.h"
+#include "aos/time/time.h"
+#include "y2020/vision/sift/fast_gaussian.h"
+#include "glog/logging.h"
+#include "y2020/vision/sift/sift971.h"
+
+DEFINE_string(image, "", "Image to test with");
+
+int main(int argc, char **argv) {
+  aos::InitGoogle(&argc, &argv);
+
+  cv::setNumThreads	(4);
+
+  const cv::Mat raw_image = cv::imread(FLAGS_image);
+  CHECK(!raw_image.empty()) << ": Failed to read: " << FLAGS_image;
+  CHECK_EQ(CV_8UC3, raw_image.type());
+#if 0
+  cv::Mat color_image;
+  raw_image.convertTo(color_image, CV_32F, 1.0/255.0);
+  cv::Mat image;
+  cv::cvtColor(color_image, image, cv::COLOR_BGR2GRAY);
+#else
+  cv::Mat gray_image;
+  cv::cvtColor(raw_image, gray_image, cv::COLOR_BGR2GRAY);
+  cv::Mat float_image;
+#if 0
+  gray_image.convertTo(float_image, CV_32F, 0.00390625);
+#else
+  float_image = gray_image;
+#endif
+  cv::Mat image;
+  cv::resize(float_image, image, cv::Size(1280, 720), 0, 0, cv::INTER_AREA);
+#endif
+#if 0
+#if 0
+  cv::namedWindow("source", cv::WINDOW_AUTOSIZE);
+  cv::imshow("source", raw_image);
+  cv::namedWindow("converted", cv::WINDOW_AUTOSIZE);
+  cv::imshow("converted", image);
+#endif
+
+  cv::Mat slow_blurred, fast_blurred;
+  const double sigma = 3.0900155872895909;
+  cv::GaussianBlur(image, slow_blurred, cv::Size(9, 9), sigma, sigma);
+  frc971::vision::FastGaussian(image, &fast_blurred, sigma);
+  cv::namedWindow("slow", cv::WINDOW_AUTOSIZE);
+  cv::imshow("slow", slow_blurred);
+  cv::namedWindow("fast", cv::WINDOW_AUTOSIZE);
+  cv::imshow("fast", fast_blurred);
+  cv::waitKey(0);
+  return 0;
+#endif
+
+  LOG(INFO);
+  std::unique_ptr<frc971::vision::SIFT971_Impl> sift(new frc971::vision::SIFT971_Impl());
+  std::vector<cv::KeyPoint> keypoints;
+  cv::Mat descriptors;
+  LOG(INFO) << "detectAndCompute on " << image.rows << "x" << image.cols;
+  sift->detectAndCompute(image, cv::noArray(), keypoints, descriptors);
+  LOG(INFO);
+
+#if 0
+  return 0;
+#endif
+
+  static constexpr int kIterations = 40;
+  const auto start = aos::monotonic_clock::now();
+  for (int i = 0; i < kIterations; ++i) {
+    keypoints.clear();
+    descriptors.release();
+    sift->detectAndCompute(image, cv::noArray(), keypoints, descriptors);
+  }
+  const auto end = aos::monotonic_clock::now();
+  LOG(INFO)
+      << "Took: "
+      << (std::chrono::duration<double>(end - start) / kIterations).count();
+  // Should be ~352 for FRC-Image4-cleaned.png downscaled to 640x360.
+  // 376 in DoG_TYPE_SHORT mode.
+  // 344 now with 1280x720 non-upscaled.
+  LOG(INFO) << "found " << keypoints.size() << " and " << descriptors.size();
+}