Clean up printing in MCU code and add new options

We now have four different ways of getting debug prints off of boards,
with varying tradeoffs. Split out the printing into dedicated libraries
that are easy to switch between to avoid duplicating even more code, and
also make using the new options easy.

USB is handy for testing the code on a Teensy.
Semihosting is nice in theory, but in practice it's super slow and
messes up the code's timing.
ITM works well, as long as you have a debugger attached.
Serial also works pretty well, but it means having another cable.

Change-Id: I7af5099d421c33f0324aeca92b46732e341848d4
diff --git a/motors/BUILD b/motors/BUILD
index a1375f4..09b7213 100644
--- a/motors/BUILD
+++ b/motors/BUILD
@@ -18,7 +18,7 @@
         "//motors/peripheral:adc",
         "//motors/peripheral:can",
         "//motors/peripheral:configuration",
-        "//motors/usb:cdc",
+        "//motors/print",
     ],
 )
 
@@ -115,6 +115,7 @@
         "//motors/core",
         "//motors/peripheral:adc",
         "//motors/peripheral:can",
+        "//motors/print:usb",
         "//motors/usb",
         "//motors/usb:cdc",
         "//motors/usb:hid",
@@ -137,10 +138,9 @@
         "//motors/core",
         "//motors/peripheral:adc",
         "//motors/peripheral:can",
+        "//motors/print:usb",
         "//motors/seems_reasonable:drivetrain_lib",
         "//motors/seems_reasonable:spring",
-        "//motors/usb",
-        "//motors/usb:cdc",
     ],
 )
 
diff --git a/motors/NOTES.md b/motors/NOTES.md
index 995e97f..a822d79 100644
--- a/motors/NOTES.md
+++ b/motors/NOTES.md
@@ -29,6 +29,7 @@
     * Alternative part is MK22FX512AVLH12 with 512 KB of flash
     * [datasheet](https://www.nxp.com/docs/en/data-sheet/K22P64M120SF5V2.pdf)
     * [reference manual](https://www.nxp.com/docs/en/reference-manual/K22P64M120SF5V2RM.pdf)
+* [ARMv7-M Architecture Reference Manual](https://static.docs.arm.com/ddi0403/eb/DDI0403E_B_armv7m_arm.pdf)
 
 ### Clocking
 * Running the core clock at its maximum of 120 MHz
diff --git a/motors/button_board.cc b/motors/button_board.cc
index 676331b..0c0050b 100644
--- a/motors/button_board.cc
+++ b/motors/button_board.cc
@@ -5,13 +5,14 @@
 #include <atomic>
 #include <cmath>
 
-#include "motors/core/time.h"
 #include "motors/core/kinetis.h"
+#include "motors/core/time.h"
 #include "motors/peripheral/adc.h"
 #include "motors/peripheral/can.h"
-#include "motors/usb/usb.h"
+#include "motors/print/print.h"
 #include "motors/usb/cdc.h"
 #include "motors/usb/hid.h"
+#include "motors/usb/usb.h"
 #include "motors/util.h"
 
 namespace frc971 {
@@ -58,8 +59,6 @@
   return r;
 }
 
-::std::atomic<teensy::AcmTty *> global_stdout{nullptr};
-
 // The HID report descriptor we use.
 constexpr char kReportDescriptor1[] = {
     0x05, 0x01,        // Usage Page (Generic Desktop),
@@ -224,17 +223,16 @@
 extern "C" {
 
 void *__stack_chk_guard = (void *)0x67111971;
-
-int _write(int /*file*/, char *ptr, int len) {
-  teensy::AcmTty *const tty = global_stdout.load(::std::memory_order_acquire);
-  if (tty != nullptr) {
-    return tty->Write(ptr, len);
+void __stack_chk_fail(void) {
+  while (true) {
+    GPIOC_PSOR = (1 << 5);
+    printf("Stack corruption detected\n");
+    delay(1000);
+    GPIOC_PCOR = (1 << 5);
+    delay(1000);
   }
-  return 0;
 }
 
-void __stack_chk_fail(void);
-
 }  // extern "C"
 
 extern "C" int main(void) {
@@ -370,8 +368,13 @@
       ::std::string(kReportDescriptor1, sizeof(kReportDescriptor1)));
 
   teensy::AcmTty tty1(&usb_device);
-  global_stdout.store(&tty1, ::std::memory_order_release);
+  PrintingParameters printing_parameters;
+  printing_parameters.stdout_tty = &tty1;
+
+  const ::std::unique_ptr<PrintingImplementation> printing =
+      CreatePrinting(printing_parameters);
   usb_device.Initialize();
+  printing->Initialize();
 
   can_init(0, 1);
   AdcInitJoystick();
@@ -397,15 +400,5 @@
   return 0;
 }
 
-void __stack_chk_fail(void) {
-  while (true) {
-    GPIOC_PSOR = (1 << 5);
-    printf("Stack corruption detected\n");
-    delay(1000);
-    GPIOC_PCOR = (1 << 5);
-    delay(1000);
-  }
-}
-
 }  // namespace motors
 }  // namespace frc971
diff --git a/motors/core/BUILD b/motors/core/BUILD
index f42a1bc..6e08dd4 100644
--- a/motors/core/BUILD
+++ b/motors/core/BUILD
@@ -19,8 +19,36 @@
     hdrs = [
         "kinetis.h",
         "nonstd.h",
+        "reg_debug.h",
         "time.h",
     ],
     restricted_to = mcu_cpus,
     visibility = ["//visibility:public"],
 )
+
+cc_library(
+    name = "semihosting",
+    hdrs = [
+        "semihosting.h",
+    ],
+    restricted_to = mcu_cpus,
+    visibility = ["//visibility:public"],
+    deps = [
+        "//third_party/GSL",
+    ],
+)
+
+cc_library(
+    name = "itm",
+    srcs = [
+        "itm.cc",
+    ],
+    hdrs = [
+        "itm.h",
+    ],
+    restricted_to = mcu_cpus,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":core",
+    ],
+)
diff --git a/motors/core/itm.cc b/motors/core/itm.cc
new file mode 100644
index 0000000..2dcc5e6
--- /dev/null
+++ b/motors/core/itm.cc
@@ -0,0 +1,56 @@
+#include "motors/core/itm.h"
+
+#include "motors/core/kinetis.h"
+
+namespace frc971 {
+namespace motors {
+namespace itm {
+namespace {
+
+constexpr int kDesiredTraceFrequency = 64000;
+// TODO(Brian): Is this actually the frequency our parts use?
+constexpr int kTpiuAsynchronousClockFrequency = F_CPU;
+
+}  // namespace
+
+void Initialize() {
+  ARM_DEMCR = ARM_DEMCR_TRCENA;
+
+  // 0 is parallel mode (not supported on Kinetis-K22 at least).
+  // 1 is asynchronous SWO with Manchester encoding ("medium speed").
+  // 2 is asynchronous SWO with NRZ encoder ("low speed").
+  TPIU.SPPR = 2;
+
+  // Round up on the prescaler, to round down on the final frequency.
+  TPIU.ACPR = ((kTpiuAsynchronousClockFrequency + kDesiredTraceFrequency - 1) /
+               kDesiredTraceFrequency) -
+              1;
+
+  ITM.LAR = 0xC5ACCE55;
+
+  ITM.TCR =
+      V_TCR_TraceBusID(7) /* Whatever, as long as it's non-0 */ |
+      V_TCR_GTSFREQ(0) /* No global timestamps */ |
+      V_TCR_TSPrescale(0) /* No extra prescaling here */ |
+      M_TCR_SWOENA /* Use the TPIU's clock */ |
+      0 /* !M_TCR_TXENA to disable DWT events that we don't use */ |
+      0 /* !M_TCR_SYNCENA to disable synchronization packet transmission */ |
+      0 /* !M_TCR_TSENA to disable local timestamps */ | M_TCR_ITMENA;
+  // Allow unprivileged access to the port for printing.
+  ITM.TPR = 1;
+  // Only enable trace port 0, because that's the one we use for printing.
+  ITM.TER[0] = 1;
+
+  // Disable everything we can here.
+  DWT.CTRL = 0;
+
+  // Indicate a trigger when the input trigger signal is asserted. Not sure if
+  // this matters for anything for now, but it's what the examples all do.
+  TPIU.FFCR = M_FFCR_TrigIn;
+
+  ITM.LAR = 0;
+}
+
+}  // namespace itm
+}  // namespace motors
+}  // namespace frc971
diff --git a/motors/core/itm.h b/motors/core/itm.h
new file mode 100644
index 0000000..53f6663
--- /dev/null
+++ b/motors/core/itm.h
@@ -0,0 +1,90 @@
+#ifndef MOTORS_CORE_ITM_H_
+#define MOTORS_CORE_ITM_H_
+
+#include "motors/core/reg_debug.h"
+
+namespace frc971 {
+namespace motors {
+
+// Manages the ITM (Instrumentation and Trace Macrocell), along with the TPIU
+// (Trace Port Interface Unit) and DWT (Data Watchpoint and Trace unit).
+namespace itm {
+
+void Initialize();
+
+// Writes an 8-bit value to the specified stimulus port.
+//
+// This may be called without external synchronization simultaneously from any
+// context (including all interrupts and other fault handlers).
+inline void Write8(int port, uint8_t value) {
+  const volatile uint32_t *const address = &ITM.STIM[port];
+  uint32_t scratch;
+  // This what ARM recommends in the Cortex-M3 TRM for concurrent usage by
+  // interrupts etc. It's basically a compare-exchange from 0 to value without
+  // any memory barriers, except actual C/C++ atomics are allergic to volatile.
+  // See here for an example:
+  // https://developer.arm.com/docs/ddi0337/e/system-debug/itm/summary-and-description-of-the-itm-registers
+  __asm__ __volatile__(
+      // Load the status (whether the FIFO is full).
+      "1: ldrexb %[scratch], [%[address]]\n\t"
+      // Check if it's reporting full or not.
+      "cmp %[scratch], #0\n\t"
+      "itt ne\n\t"
+      // If it's not full, try storing our data.
+      "strexbne %[scratch], %[value], [%[address]]\n\t"
+      // If it's not full, check if our store succeeded.
+      "cmpne %[scratch], #1\n\t"
+      // If it's full or the store failed, try again.
+      "beq 1b\n\t"
+      : [scratch] "=&r"(scratch)
+      : [address] "r"(address), [value] "r"(value)
+      : "cc");
+}
+
+// See documentation for Write8.
+inline void Write16(int port, uint16_t value) {
+  const volatile uint32_t *const address = &ITM.STIM[port];
+  uint32_t scratch;
+  __asm__ __volatile__(
+      // Load the status (whether the FIFO is full).
+      "1: ldrexh %[scratch], [%[address]]\n\t"
+      // Check if it's reporting full or not.
+      "cmp %[scratch], #0\n\t"
+      "itt ne\n\t"
+      // If it's not full, try storing our data.
+      "strexhne %[scratch], %[value], [%[address]]\n\t"
+      // If it's not full, check if our store succeeded.
+      "cmpne %[scratch], #1\n\t"
+      // If it's full or the store failed, try again.
+      "beq 1b\n\t"
+      : [scratch] "=&r"(scratch)
+      : [address] "r"(address), [value] "r"(value)
+      : "cc");
+}
+
+// See documentation for Write8.
+inline void Write32(int port, uint32_t value) {
+  const volatile uint32_t *const address = &ITM.STIM[port];
+  uint32_t scratch;
+  __asm__ __volatile__(
+      // Load the status (whether the FIFO is full).
+      "1: ldrex %[scratch], [%[address]]\n\t"
+      // Check if it's reporting full or not.
+      "cmp %[scratch], #0\n\t"
+      "itt ne\n\t"
+      // If it's not full, try storing our data.
+      "strexne %[scratch], %[value], [%[address]]\n\t"
+      // If it's not full, check if our store succeeded.
+      "cmpne %[scratch], #1\n\t"
+      // If it's full or the store failed, try again.
+      "beq 1b\n\t"
+      : [scratch] "=&r"(scratch)
+      : [address] "r"(address), [value] "r"(value)
+      : "cc");
+}
+
+}  // namespace itm
+}  // namespace motors
+}  // namespace frc971
+
+#endif  // MOTORS_CORE_ITM_H_
diff --git a/motors/core/reg_debug.h b/motors/core/reg_debug.h
new file mode 100644
index 0000000..418c391
--- /dev/null
+++ b/motors/core/reg_debug.h
@@ -0,0 +1,174 @@
+#ifndef MOTORS_CORE_REG_DEBUG_H_
+#define MOTORS_CORE_REG_DEBUG_H_
+
+#include <stdint.h>
+#include <stddef.h>
+
+// Cortex-M4 always has 32 registers according to its Technical Reference
+// Manual.
+#define ITM_NUM_STIM 32
+
+// Cortex-M4 TRM is incomplete. See the Cortex-M3 one for all the registers
+// here (and some forms of the M3 one are missing registers too?):
+// https://developer.arm.com/docs/ddi0337/e/system-debug/itm/summary-and-description-of-the-itm-registers#BABBCEHF
+struct ARM_ITM {
+  volatile uint32_t STIM[ITM_NUM_STIM];
+  volatile uint32_t unused1[896 - ITM_NUM_STIM];
+  volatile uint32_t TER[(ITM_NUM_STIM + 31) / 32];
+  volatile uint32_t unused2[16 - ((ITM_NUM_STIM + 31) / 32)];
+  volatile uint32_t TPR;
+  volatile uint32_t unused3[15];
+  volatile uint32_t TCR;
+  volatile uint32_t unused4[29];
+  volatile uint32_t IWR;
+  const volatile uint32_t IRR;
+  volatile uint32_t IMC;
+  volatile uint32_t unused5[43];
+  volatile uint32_t LAR;
+  const volatile uint32_t LSR;
+  volatile uint32_t unused6[6];
+  union {
+    struct {
+      const volatile uint32_t PID4;
+      const volatile uint32_t PID5;
+      const volatile uint32_t PID6;
+      const volatile uint32_t PID7;
+      const volatile uint32_t PID0;
+      const volatile uint32_t PID1;
+      const volatile uint32_t PID2;
+      const volatile uint32_t PID3;
+    };
+    const volatile uint32_t PIDx[8];
+  };
+} __attribute__((aligned(0x1000)));
+static_assert(offsetof(ARM_ITM, TER) == 0xE00, "padding is wrong");
+static_assert(offsetof(ARM_ITM, TPR) == 0xE40, "padding is wrong");
+static_assert(offsetof(ARM_ITM, TCR) == 0xE80, "padding is wrong");
+static_assert(offsetof(ARM_ITM, IWR) == 0xEF8, "padding is wrong");
+static_assert(offsetof(ARM_ITM, LAR) == 0xFB0, "padding is wrong");
+static_assert(offsetof(ARM_ITM, PID4) == 0xFD0, "padding is wrong");
+static_assert(sizeof(ARM_ITM) == 0x1000, "padding is wrong");
+#define ITM (*(ARM_ITM *)0xE0000000)
+
+#define V_TCR_TraceBusID(n) ((static_cast<uint32_t>(n) & UINT32_C(0x7F)) << 16)
+#define V_TCR_GTSFREQ(n) ((static_cast<uint32_t>(n) & UINT32_C(3)) << 10)
+#define V_TCR_TSPrescale(n) ((static_cast<uint32_t>(n) & UINT32_C(3)) << 8)
+#define M_TCR_SWOENA (UINT32_C(1) << 4)
+#define M_TCR_TXENA (UINT32_C(1) << 3)
+#define M_TCR_SYNCENA (UINT32_C(1) << 2)
+#define M_TCR_TSENA (UINT32_C(1) << 1)
+#define M_TCR_ITMENA (UINT32_C(1) << 0)
+
+struct ARM_TPIU {
+  const volatile uint32_t SSPSR;
+  volatile uint32_t CSPSR;
+  volatile uint32_t unused1[2];
+  volatile uint32_t ACPR;
+  volatile uint32_t unused2[55];
+  volatile uint32_t SPPR;
+  volatile uint32_t unused3[131];
+  const volatile uint32_t FFSR;
+  volatile uint32_t FFCR;
+  const volatile uint32_t FSCR;
+  volatile uint32_t unused4[759];
+  const volatile uint32_t TRIGGER;
+  const volatile uint32_t FIFO_data0;
+  const volatile uint32_t ITATBCTR2;
+  const volatile uint32_t FIFO_data1;
+  const volatile uint32_t ITATBCTR0;
+  volatile uint32_t unused5;
+  volatile uint32_t ITCTRL;
+  volatile uint32_t unused6[39];
+  volatile uint32_t CLAIMSET;
+  volatile uint32_t CLAIMCLR;
+  volatile uint32_t unsued7[8];
+  const volatile uint32_t DEVID;
+  const volatile uint32_t DEVTYPE;
+  union {
+    struct {
+      const volatile uint32_t PID4;
+      const volatile uint32_t PID5;
+      const volatile uint32_t PID6;
+      const volatile uint32_t PID7;
+      const volatile uint32_t PID0;
+      const volatile uint32_t PID1;
+      const volatile uint32_t PID2;
+      const volatile uint32_t PID3;
+    };
+    const volatile uint32_t PIDx[8];
+  };
+  const volatile uint32_t CID[4];
+} __attribute__((aligned(0x1000)));
+static_assert(offsetof(ARM_TPIU, ACPR) == 0x10, "padding is wrong");
+static_assert(offsetof(ARM_TPIU, SPPR) == 0xF0, "padding is wrong");
+static_assert(offsetof(ARM_TPIU, FFSR) == 0x300, "padding is wrong");
+static_assert(offsetof(ARM_TPIU, TRIGGER) == 0xEE8, "padding is wrong");
+static_assert(offsetof(ARM_TPIU, PID4) == 0xFD0, "padding is wrong");
+static_assert(sizeof(ARM_TPIU) == 0x1000, "padding is wrong");
+#define TPIU (*(ARM_TPIU *)0xE0040000)
+
+#define M_FFCR_EnFCont (UINT32_C(1) << 1)
+#define M_FFCR_TrigIn (UINT32_C(1) << 8)
+
+struct ARM_DWT {
+  struct Compare {
+    volatile uint32_t COMP;
+    volatile uint32_t MASK;
+    volatile uint32_t FUNCTION;
+    volatile uint32_t unused1;
+  };
+
+  volatile uint32_t CTRL;
+  volatile uint32_t CYCCNT;
+  volatile uint32_t CPICNT;
+  volatile uint32_t EXCCNT;
+  volatile uint32_t SLEEPCNT;
+  volatile uint32_t LSUCNT;
+  volatile uint32_t FOLDCNT;
+  const volatile uint32_t PCSR;
+  Compare comp[4];
+  volatile uint32_t unused1[988];
+  union {
+    struct {
+      const volatile uint32_t PID4;
+      const volatile uint32_t PID5;
+      const volatile uint32_t PID6;
+      const volatile uint32_t PID7;
+      const volatile uint32_t PID0;
+      const volatile uint32_t PID1;
+      const volatile uint32_t PID2;
+      const volatile uint32_t PID3;
+    };
+    const volatile uint32_t PIDx[8];
+  };
+  const volatile uint32_t CID[4];
+} __attribute__((aligned(0x1000)));
+static_assert(offsetof(ARM_DWT, PID4) == 0xFD0, "padding is wrong");
+static_assert(sizeof(ARM_DWT) == 0x1000, "padding is wrong");
+#define DWT (*(ARM_DWT *)0xE0001000)
+
+#define G_DWT_CTRL_NUMCOMP(n) ((static_cast<uint32_t>(n) >> 28) & UINT32_C(0xF))
+#define M_DWT_CTRL_NOTRCPKT (UINT32_C(1) << 27)
+#define M_DWT_CTRL_NOEXTTRIG (UINT32_C(1) << 26)
+#define M_DWT_CTRL_NOCYCCNT (UINT32_C(1) << 25)
+#define M_DWT_CTRL_NOPRFCNT (UINT32_C(1) << 24)
+#define M_DWT_CTRL_CYCEVTENA (UINT32_C(1) << 22)
+#define M_DWT_CTRL_FOLDEVTENA (UINT32_C(1) << 21)
+#define M_DWT_CTRL_LSUEVTENA (UINT32_C(1) << 20)
+#define M_DWT_CTRL_SLEEPEVTENA (UINT32_C(1) << 19)
+#define M_DWT_CTRL_EXCEVTENA (UINT32_C(1) << 18)
+#define M_DWT_CTRL_CPIEVTENA (UINT32_C(1) << 17)
+#define M_DWT_CTRL_EXCTRCENA (UINT32_C(1) << 16)
+#define M_DWT_CTRL_PCSAMPLENA (UINT32_C(1) << 12)
+#define V_DWT_CTRL_SYNCTAP(n) ((static_cast<uint32_t>(n) & UINT32_C(3)) << 10)
+#define G_DWT_CTRL_SYNCTAP(n) ((static_cast<uint32_t>(n) >> 10) & UINT32_C(3))
+#define M_DWT_CTRL_CYCTAP (UINT32_C(1) << 9)
+#define V_DWT_CTRL_POSTINIT(n) ((static_cast<uint32_t>(n) & UINT32_C(0xF)) << 5))
+#define G_DWT_CTRL_POSTINIT(n) ((static_cast<uint32_t>(n) >> 5) & UINT32_C(0xF))
+#define V_DWT_CTRL_POSTPRESET(n) \
+  ((static_cast<uint32_t>(n) & UINT32_C(0xF)) << 1)
+#define G_DWT_CTRL_POSTPRESET(n) \
+  ((static_cast<uint32_t>(n) >> 1) & UINT32_C(0xF))
+#define M_DWT_CTRL_CYCCNTENA (UINT32_C(1) << 0)
+
+#endif  // MOTORS_CORE_REG_DEBUG_H_
diff --git a/motors/core/semihosting.h b/motors/core/semihosting.h
new file mode 100644
index 0000000..c8de1f6
--- /dev/null
+++ b/motors/core/semihosting.h
@@ -0,0 +1,194 @@
+#ifndef MOTORS_CORE_SEMIHOSTING_H_
+#define MOTORS_CORE_SEMIHOSTING_H_
+
+// This file defines interfaces to the ARM semihosting functions.
+
+#include "third_party/GSL/include/gsl/gsl"
+
+namespace frc971 {
+namespace motors {
+namespace semihosting {
+
+inline uint32_t integer_operation(const uint32_t operation, void *const block) {
+  register uint32_t operation_register asm("r0") = operation;
+  register void *block_register asm("r1") = block;
+  // Clobbering all the other registers here doesn't help the strange ways this
+  // affects other code.
+  __asm__ __volatile__("bkpt #0xab"
+                       : "+r"(operation_register)
+                       : "r"(block_register)
+                       : "memory", "cc");
+  return operation_register;
+}
+
+struct Close {
+  // A handle for an open file.
+  uint32_t handle;
+
+  // Returns 0 if it succeeds, or -1 if not.
+  int Execute() {
+    return integer_operation(0x02, this);
+  }
+};
+
+struct Errno {
+  // Returns the current errno value.
+  int Execute() {
+    return integer_operation(0x13, nullptr);
+  }
+};
+
+struct FileLength {
+  // A handle for a previously opened seekable file.
+  uint32_t handle;
+
+  // Returns the current length of the file or -1.
+  int Execute() {
+    return integer_operation(0x0C, this);
+  }
+};
+
+struct GetCommandLine {
+  // Where the result is stored.
+  // The data is guaranteed to be null-terminated, but it's unclear what happens
+  // if the provided buffer is too short.
+  char *buffer;
+  uint32_t length;
+
+  GetCommandLine() = default;
+  GetCommandLine(gsl::span<char> buffer_in) {
+    buffer = buffer_in.data();
+    length = buffer_in.size();
+  }
+
+  // Returns 0 if it succeeds, or -1 if not.
+  int Execute() {
+    return integer_operation(0x15, this);
+  }
+};
+
+struct IsTty {
+  // A handle for a previously opened file.
+  uint32_t handle;
+
+  // Returns 0 if it's a file, 1 if it's an interactive device, or something
+  // else otherwise.
+  int Execute() {
+    return integer_operation(0x09, this);
+  }
+};
+
+struct Open {
+  // The names are basically fopen arguments. + is translated to P.
+  enum class Mode : uint32_t {
+    kR = 0,
+    kRB = 1,
+    kRP = 2,
+    kRPB = 3,
+    kW = 4,
+    kWB = 5,
+    kWP = 6,
+    kWPB = 7,
+    kA = 8,
+    kAB = 9,
+    kAP = 10,
+    kAPB = 11,
+  };
+
+  // A null-terminated file or device name.
+  char *name;
+  Mode mode;
+  // Does not include the terminating null which must still be present.
+  int name_length;
+
+  // Returns a non-zero file handle if it succeeds, or -1 if not.
+  int Execute() {
+    return integer_operation(0x01, this);
+  }
+};
+
+struct Read {
+  // A handle for a previously opened file.
+  uint32_t handle;
+  char *buffer;
+  uint32_t size;
+
+  Read() = default;
+  Read(uint32_t handle_in, gsl::span<char> buffer_in) {
+    handle = handle_in;
+    buffer = buffer_in.data();
+    size = buffer_in.size();
+  }
+
+  // Returns the result. If this is empty, that means the file is at EOF.
+  gsl::span<const char> Execute() {
+    const uint32_t not_read = integer_operation(0x06, this);
+    return gsl::span<const char>(buffer, size - not_read);
+  }
+};
+
+struct ReadCharacter {
+  char Execute() {
+    return integer_operation(0x07, nullptr);
+  }
+};
+
+struct Seek {
+  // A handle for a previously opened file.
+  uint32_t handle;
+  // The absolute byte position to move to.
+  uint32_t position;
+
+  // Returns 0 if it succeeds, or a negative value if not.
+  int Execute() {
+    return integer_operation(0x0A, this);
+  }
+};
+
+struct RealtimeTime {
+  // Returns the number of seconds since 00:00 January 1, 1970.
+  uint32_t Execute() {
+    return integer_operation(0x11, nullptr);
+  }
+};
+
+struct Write {
+  // A handle for a previously opened file.
+  // 1 and 2 for stdout/stderr seem to work too.
+  uint32_t handle;
+  const char *buffer;
+  uint32_t length;
+
+  Write() = default;
+  Write(uint32_t handle_in, gsl::span<const char> buffer_in) {
+    handle = handle_in;
+    buffer = buffer_in.data();
+    length = buffer_in.size();
+  }
+
+  // Returns 0 if it succeeds, or the number of bytes NOT written.
+  int Execute() {
+    return integer_operation(0x05, this);
+  }
+};
+
+struct WriteDebugCharacter {
+  const char *character;
+
+  void Execute() {
+    integer_operation(0x03, this);
+  }
+};
+
+struct WriteDebug {
+  // string must be null-terminated.
+  void Execute(const char *string) {
+    integer_operation(0x04, const_cast<char *>(string));
+  }
+};
+
+}  // namespace semihosting
+}  // namespace motors
+}  // namespace frc971
+
+#endif  // MOTORS_CORE_SEMIHOSTING_H_
diff --git a/motors/fet12/BUILD b/motors/fet12/BUILD
index a637a55..625c149 100644
--- a/motors/fet12/BUILD
+++ b/motors/fet12/BUILD
@@ -15,8 +15,7 @@
         "//motors/core",
         "//motors/peripheral:adc",
         "//motors/peripheral:can",
-        "//motors/usb",
-        "//motors/usb:cdc",
+        "//motors/print:usb",
     ],
 )
 
@@ -35,7 +34,7 @@
         "//motors/peripheral:adc",
         "//motors/peripheral:adc_dma",
         "//motors/peripheral:can",
-        "//motors/peripheral:uart",
+        "//motors/print:uart",
     ],
 )
 
diff --git a/motors/fet12/fet12.cc b/motors/fet12/fet12.cc
index 889138e..93b7828 100644
--- a/motors/fet12/fet12.cc
+++ b/motors/fet12/fet12.cc
@@ -10,8 +10,7 @@
 #include "motors/motor.h"
 #include "motors/peripheral/adc.h"
 #include "motors/peripheral/can.h"
-#include "motors/usb/cdc.h"
-#include "motors/usb/usb.h"
+#include "motors/print/print.h"
 #include "motors/util.h"
 
 namespace frc971 {
@@ -64,7 +63,6 @@
 }
 
 ::std::atomic<Motor *> global_motor{nullptr};
-::std::atomic<teensy::AcmTty *> global_stdout{nullptr};
 
 extern "C" {
 
@@ -79,16 +77,6 @@
   }
 }
 
-int _write(int /*file*/, char *ptr, int len) {
-  teensy::AcmTty *const tty = global_stdout.load(::std::memory_order_acquire);
-  if (tty != nullptr) {
-    return tty->Write(ptr, len);
-  }
-  return 0;
-}
-
-void __stack_chk_fail(void);
-
 extern char *__brkval;
 extern uint32_t __bss_ram_start__[];
 extern uint32_t __heap_start__[];
@@ -239,13 +227,11 @@
 
   DMA.CR = M_DMA_EMLM;
 
-  teensy::UsbDevice usb_device(0, 0x16c0, 0x0490);
-  usb_device.SetManufacturer("FRC 971 Spartan Robotics");
-  usb_device.SetProduct("FET12");
-  teensy::AcmTty tty1(&usb_device);
-  teensy::AcmTty tty2(&usb_device);
-  global_stdout.store(&tty1, ::std::memory_order_release);
-  usb_device.Initialize();
+  PrintingParameters printing_parameters;
+  printing_parameters.dedicated_usb = true;
+  const ::std::unique_ptr<PrintingImplementation> printing =
+      CreatePrinting(printing_parameters);
+  printing->Initialize();
 
   AdcInitFet12();
   MathInit();
diff --git a/motors/fet12/fet12v2.cc b/motors/fet12/fet12v2.cc
index 7d18f46..2461666 100644
--- a/motors/fet12/fet12v2.cc
+++ b/motors/fet12/fet12v2.cc
@@ -12,9 +12,8 @@
 #include "motors/peripheral/adc.h"
 #include "motors/peripheral/adc_dma.h"
 #include "motors/peripheral/can.h"
-#include "motors/peripheral/uart.h"
+#include "motors/print/print.h"
 #include "motors/util.h"
-#include "third_party/GSL/include/gsl/gsl"
 
 namespace frc971 {
 namespace motors {
@@ -65,17 +64,8 @@
 ::std::atomic<Motor *> global_motor{nullptr};
 ::std::atomic<teensy::AdcDmaSampler *> global_adc_dma{nullptr};
 
-::std::atomic<teensy::InterruptBufferedUart *> global_stdout{nullptr};
-
 extern "C" {
 
-void uart0_status_isr(void) {
-  teensy::InterruptBufferedUart *const tty =
-      global_stdout.load(::std::memory_order_relaxed);
-  DisableInterrupts disable_interrupts;
-  tty->HandleInterrupt(disable_interrupts);
-}
-
 void *__stack_chk_guard = (void *)0x67111971;
 void __stack_chk_fail(void) {
   while (true) {
@@ -87,18 +77,6 @@
   }
 }
 
-int _write(int /*file*/, char *ptr, int len) {
-  teensy::InterruptBufferedUart *const tty =
-      global_stdout.load(::std::memory_order_acquire);
-  if (tty != nullptr) {
-    tty->Write(gsl::make_span(ptr, len));
-    return len;
-  }
-  return 0;
-}
-
-void __stack_chk_fail(void);
-
 extern char *__brkval;
 extern uint32_t __bss_ram_start__[];
 extern uint32_t __heap_start__[];
@@ -478,10 +456,15 @@
   PORTB_PCR16 = PORT_PCR_DSE | PORT_PCR_MUX(3);
   PORTB_PCR17 = PORT_PCR_DSE | PORT_PCR_MUX(3);
   SIM_SCGC4 |= SIM_SCGC4_UART0;
-  teensy::InterruptBufferedUart debug_uart(&UART0, F_CPU);
-  debug_uart.Initialize(115200);
-  global_stdout.store(&debug_uart, ::std::memory_order_release);
-  NVIC_ENABLE_IRQ(IRQ_UART0_STATUS);
+
+  PrintingParameters printing_parameters;
+  printing_parameters.stdout_uart_module = &UART0;
+  printing_parameters.stdout_uart_module_clock_frequency = F_CPU;
+  printing_parameters.stdout_uart_status_interrupt = IRQ_UART0_STATUS;
+  printing_parameters.dedicated_usb = true;
+  const ::std::unique_ptr<PrintingImplementation> printing =
+      CreatePrinting(printing_parameters);
+  printing->Initialize();
 
   AdcInitFet12();
   MathInit();
diff --git a/motors/motor.cc b/motors/motor.cc
index 5bd2271..ebeec22 100644
--- a/motors/motor.cc
+++ b/motors/motor.cc
@@ -402,23 +402,25 @@
   } else {
 #endif  // SAMPLE_UNTIL_DONE
     // Time to write the data out.
-    if (written < (int)sizeof(data) && debug_tty_ != nullptr) {
+    if (written < (int)sizeof(data) && printing_implementation_ != nullptr) {
       int to_write = sizeof(data) - written;
       if (to_write > 64) {
         to_write = 64;
       }
-      int result = debug_tty_->Write(((const char *)data) + written, to_write);
+      int result = printing_implementation_->Write(((const char *)data) + written, to_write);
       if (result >= 0) {
         written += result;
       } else {
         printf("error\n");
       }
     }
+#if 0
     if (!done_writing && written >= (int)sizeof(data) &&
-        debug_tty_->write_queue_empty()) {
+        printing_implementation_->write_queue_empty()) {
       printf("done writing %d\n", written);
       done_writing = true;
     }
+#endif
   }
 #endif  // PRINT_READINGS/PRINT_ALL_READINGS/TAKE_SAMPLE
   (void)balanced;
diff --git a/motors/motor.h b/motors/motor.h
index 4c5e604..4a65f63 100644
--- a/motors/motor.h
+++ b/motors/motor.h
@@ -7,11 +7,11 @@
 
 #include "motors/algorithms.h"
 #include "motors/core/kinetis.h"
+#include "motors/core/time.h"
 #include "motors/peripheral/adc.h"
 #include "motors/peripheral/configuration.h"
+#include "motors/print/print.h"
 #include "motors/util.h"
-#include "motors/usb/cdc.h"
-#include "motors/core/time.h"
 
 namespace frc971 {
 namespace motors {
@@ -61,7 +61,10 @@
 
   void Reset() { controls_->Reset(); }
 
-  void set_debug_tty(teensy::AcmTty *debug_tty) { debug_tty_ = debug_tty; }
+  void set_printing_implementation(
+      PrintingImplementation *printing_implementation) {
+    printing_implementation_ = printing_implementation;
+  }
   void set_deadtime_compensation(int deadtime_compensation) {
     deadtime_compensation_ = deadtime_compensation;
   }
@@ -177,7 +180,7 @@
   int encoder_multiplier_ = 1;
   uint32_t last_wrapped_encoder_reading_ = 0;
 
-  teensy::AcmTty *debug_tty_ = nullptr;
+  PrintingImplementation *printing_implementation_ = nullptr;
 };
 
 }  // namespace motors
diff --git a/motors/peripheral/uart.cc b/motors/peripheral/uart.cc
index 70eadf1..3f7ddaf 100644
--- a/motors/peripheral/uart.cc
+++ b/motors/peripheral/uart.cc
@@ -50,7 +50,7 @@
   module_->RWFIFO = rx_fifo_size_ - 1;
 }
 
-void Uart::DoWrite(gsl::span<char> data) {
+void Uart::DoWrite(gsl::span<const char> data) {
   for (int i = 0; i < data.size(); ++i) {
     while (!SpaceAvailable()) {
     }
@@ -64,7 +64,7 @@
   uart_.Initialize(baud_rate);
 }
 
-void InterruptBufferedUart::Write(gsl::span<char> data) {
+void InterruptBufferedUart::Write(gsl::span<const char> data) {
   DisableInterrupts disable_interrupts;
   uart_.EnableTransmitInterrupt();
   static_assert(buffer_.size() >= 8,
diff --git a/motors/peripheral/uart.h b/motors/peripheral/uart.h
index 12b8523..ca40ab7 100644
--- a/motors/peripheral/uart.h
+++ b/motors/peripheral/uart.h
@@ -21,7 +21,9 @@
   void Initialize(int baud_rate);
 
   // Blocks until all of the data is at least queued.
-  void Write(gsl::span<char> data, const DisableInterrupts &) { DoWrite(data); }
+  void Write(gsl::span<const char> data, const DisableInterrupts &) {
+    DoWrite(data);
+  }
 
   bool SpaceAvailable() const { return module_->S1 & M_UART_TDRE; }
   // Only call this if SpaceAvailable() has just returned true.
@@ -38,7 +40,7 @@
   }
 
  private:
-  void DoWrite(gsl::span<char> data);
+  void DoWrite(gsl::span<const char> data);
 
   KINETISK_UART_t *const module_;
   const int module_clock_frequency_;
@@ -56,7 +58,7 @@
 
   void Initialize(int baud_rate);
 
-  void Write(gsl::span<char> data);
+  void Write(gsl::span<const char> data);
 
   // Should be called as the body of the interrupt handler.
   void HandleInterrupt(const DisableInterrupts &disable_interrupts) {
diff --git a/motors/peripheral/uart_buffer.h b/motors/peripheral/uart_buffer.h
index fb7d126..63dd70d 100644
--- a/motors/peripheral/uart_buffer.h
+++ b/motors/peripheral/uart_buffer.h
@@ -13,7 +13,7 @@
 class UartBuffer {
  public:
   // Returns the number of characters added.
-  __attribute__((warn_unused_result)) int PushSpan(gsl::span<char> data);
+  __attribute__((warn_unused_result)) int PushSpan(gsl::span<const char> data);
 
   bool empty() const { return size_ == 0; }
 
@@ -32,7 +32,7 @@
 };
 
 template<int kSize>
-int UartBuffer<kSize>::PushSpan(gsl::span<char> data) {
+int UartBuffer<kSize>::PushSpan(gsl::span<const char> data) {
   const int end_location = (start_ + size_) % kSize;
   const int remaining_end = ::std::min(kSize - size_, kSize - end_location);
   const int on_end = ::std::min<int>(data.size(), remaining_end);
diff --git a/motors/pistol_grip/BUILD b/motors/pistol_grip/BUILD
index 75cab98..b86db22 100644
--- a/motors/pistol_grip/BUILD
+++ b/motors/pistol_grip/BUILD
@@ -11,6 +11,7 @@
         "//motors:util",
         "//motors/core",
         "//motors/peripheral:can",
+        "//motors/print:usb",
         "//motors/usb",
         "//motors/usb:cdc",
         "//motors/usb:hid",
@@ -39,8 +40,7 @@
         "//motors/core",
         "//motors/peripheral:adc",
         "//motors/peripheral:can",
-        "//motors/usb",
-        "//motors/usb:cdc",
+        "//motors/print:usb",
     ],
 )
 
diff --git a/motors/pistol_grip/controller.cc b/motors/pistol_grip/controller.cc
index 54d8f04..9f292d1 100644
--- a/motors/pistol_grip/controller.cc
+++ b/motors/pistol_grip/controller.cc
@@ -13,8 +13,7 @@
 #include "motors/peripheral/adc.h"
 #include "motors/peripheral/can.h"
 #include "motors/pistol_grip/motor_controls.h"
-#include "motors/usb/cdc.h"
-#include "motors/usb/usb.h"
+#include "motors/print/print.h"
 #include "motors/util.h"
 
 #define MOTOR0_PWM_FTM FTM3
@@ -141,7 +140,6 @@
     ::frc971::control_loops::drivetrain::kHapticTriggerCurrentLimit);
 
 ::std::atomic<Motor *> global_motor0{nullptr}, global_motor1{nullptr};
-::std::atomic<teensy::AcmTty *> global_stdout{nullptr};
 
 // Angle last time the current loop ran.
 ::std::atomic<float> global_wheel_angle{0.0f};
@@ -195,14 +193,6 @@
   }
 }
 
-int _write(int /*file*/, char *ptr, int len) {
-  teensy::AcmTty *const tty = global_stdout.load(::std::memory_order_acquire);
-  if (tty != nullptr) {
-    return tty->Write(ptr, len);
-  }
-  return 0;
-}
-
 extern uint32_t __bss_ram_start__[], __bss_ram_end__[];
 extern uint32_t __data_ram_start__[], __data_ram_end__[];
 extern uint32_t __heap_start__[], __heap_end__[];
@@ -736,13 +726,11 @@
 
   DMA.CR = M_DMA_EMLM;
 
-  teensy::UsbDevice usb_device(0, 0x16c0, 0x0490);
-  usb_device.SetManufacturer("FRC 971 Spartan Robotics");
-  usb_device.SetProduct("Pistol Grip Controller debug");
-  teensy::AcmTty tty1(&usb_device);
-  teensy::AcmTty tty2(&usb_device);
-  global_stdout.store(&tty1, ::std::memory_order_release);
-  usb_device.Initialize();
+  PrintingParameters printing_parameters;
+  printing_parameters.dedicated_usb = true;
+  const ::std::unique_ptr<PrintingImplementation> printing =
+      CreatePrinting(printing_parameters);
+  printing->Initialize();
 
   AdcInitSmall();
   MathInit();
@@ -790,12 +778,12 @@
   Motor motor0(
       MOTOR0_PWM_FTM, MOTOR0_ENCODER_FTM, &controls0,
       {&MOTOR0_PWM_FTM->C4V, &MOTOR0_PWM_FTM->C2V, &MOTOR0_PWM_FTM->C6V});
-  motor0.set_debug_tty(&tty2);
+  motor0.set_printing_implementation(printing.get());
   motor0.set_switching_divisor(kSwitchingDivisor);
   Motor motor1(
       MOTOR1_PWM_FTM, MOTOR1_ENCODER_FTM, &controls1,
       {&MOTOR1_PWM_FTM->C0V, &MOTOR1_PWM_FTM->C2V, &MOTOR1_PWM_FTM->C4V});
-  motor1.set_debug_tty(&tty2);
+  motor1.set_printing_implementation(printing.get());
   motor1.set_switching_divisor(kSwitchingDivisor);
   ConfigurePwmFtm(MOTOR0_PWM_FTM);
   ConfigurePwmFtm(MOTOR1_PWM_FTM);
diff --git a/motors/pistol_grip/drivers_station.cc b/motors/pistol_grip/drivers_station.cc
index e87d5e3..4b4606f 100644
--- a/motors/pistol_grip/drivers_station.cc
+++ b/motors/pistol_grip/drivers_station.cc
@@ -4,21 +4,20 @@
 #include <stdio.h>
 #include <atomic>
 
-#include "motors/core/time.h"
 #include "motors/core/kinetis.h"
+#include "motors/core/time.h"
 #include "motors/peripheral/can.h"
-#include "motors/usb/usb.h"
+#include "motors/print/print.h"
 #include "motors/usb/cdc.h"
 #include "motors/usb/hid.h"
 #include "motors/usb/interrupt_out.h"
+#include "motors/usb/usb.h"
 #include "motors/util.h"
 
 namespace frc971 {
 namespace motors {
 namespace {
 
-::std::atomic<teensy::AcmTty *> global_stdout{nullptr};
-
 // TODO(Brian): Move this and the other two test functions somewhere else.
 __attribute__((unused)) void EchoChunks(teensy::AcmTty *tty1) {
   while (true) {
@@ -233,17 +232,16 @@
 extern "C" {
 
 void *__stack_chk_guard = (void *)0x67111971;
-
-int _write(int /*file*/, char *ptr, int len) {
-  teensy::AcmTty *const tty = global_stdout.load(::std::memory_order_acquire);
-  if (tty != nullptr) {
-    return tty->Write(ptr, len);
+void __stack_chk_fail(void) {
+  while (true) {
+    GPIOC_PSOR = (1 << 5);
+    printf("Stack corruption detected\n");
+    delay(1000);
+    GPIOC_PCOR = (1 << 5);
+    delay(1000);
   }
-  return 0;
 }
 
-void __stack_chk_fail(void);
-
 }  // extern "C"
 
 extern "C" int main(void) {
@@ -285,8 +283,14 @@
   teensy::AcmTty tty1(&usb_device);
   teensy::AcmTty tty2(&usb_device);
   teensy::InterruptOut interrupt_out(&usb_device, "JoystickForce");
-  global_stdout.store(&tty2, ::std::memory_order_release);
+  PrintingParameters printing_parameters;
+  printing_parameters.stdout_tty = &tty1;
+  printing_parameters.debug_tty = &tty2;
+  const ::std::unique_ptr<PrintingImplementation> printing =
+      CreatePrinting(printing_parameters);
+
   usb_device.Initialize();
+  printing->Initialize();
 
   can_init(0, 1);
 
@@ -295,15 +299,5 @@
   return 0;
 }
 
-void __stack_chk_fail(void) {
-  while (true) {
-    GPIOC_PSOR = (1 << 5);
-    printf("Stack corruption detected\n");
-    delay(1000);
-    GPIOC_PCOR = (1 << 5);
-    delay(1000);
-  }
-}
-
 }  // namespace motors
 }  // namespace frc971
diff --git a/motors/print/BUILD b/motors/print/BUILD
new file mode 100644
index 0000000..26ee976
--- /dev/null
+++ b/motors/print/BUILD
@@ -0,0 +1,81 @@
+load("//tools:environments.bzl", "mcu_cpus")
+
+cc_library(
+    name = "print",
+    hdrs = [
+        "print.h",
+    ],
+    restricted_to = mcu_cpus,
+    visibility = ["//visibility:public"],
+    deps = [
+        "//motors/core",
+        "//third_party/GSL",
+    ],
+)
+
+cc_library(
+    name = "uart",
+    srcs = [
+        "uart.cc",
+    ],
+    hdrs = [
+        "uart.h",
+    ],
+    restricted_to = mcu_cpus,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":print",
+        "//motors/core",
+        "//motors/peripheral:uart",
+    ],
+)
+
+cc_library(
+    name = "itm",
+    srcs = [
+        "itm.cc",
+    ],
+    hdrs = [
+        "itm.h",
+    ],
+    restricted_to = mcu_cpus,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":print",
+        "//motors/core:itm",
+    ],
+)
+
+cc_library(
+    name = "semihosting",
+    srcs = [
+        "semihosting.cc",
+    ],
+    hdrs = [
+        "semihosting.h",
+    ],
+    restricted_to = mcu_cpus,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":print",
+        "//motors/core:semihosting",
+    ],
+)
+
+cc_library(
+    name = "usb",
+    srcs = [
+        "usb.cc",
+    ],
+    hdrs = [
+        "usb.h",
+    ],
+    restricted_to = mcu_cpus,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":print",
+        "//motors/core",
+        "//motors/usb",
+        "//motors/usb:cdc",
+    ],
+)
diff --git a/motors/print/itm.cc b/motors/print/itm.cc
new file mode 100644
index 0000000..0e3c38c
--- /dev/null
+++ b/motors/print/itm.cc
@@ -0,0 +1,95 @@
+#include "motors/print/itm.h"
+
+#include "motors/core/itm.h"
+
+namespace frc971 {
+namespace motors {
+namespace {
+
+template<int kPort> void WriteToPort(gsl::span<const char> buffer) {
+  // This ignores memory barriers etc, because it will be called by
+  // CreatePrinting which must be called before any interrupts are enabled. That
+  // means the only thing we need to worry about is actually getting it
+  // initialized with a minimal number of cycles.
+  static bool is_initialized = false;
+  if (__builtin_expect(!is_initialized, false)) {
+    is_initialized = true;
+    itm::Initialize();
+  }
+
+  const char *next_address = buffer.data();
+  int remaining_bytes = buffer.size();
+
+  // Write small chunks to make the address even.
+  if (remaining_bytes >= 1 && (reinterpret_cast<uintptr_t>(next_address) & 1)) {
+    uint8_t value;
+    memcpy(&value, next_address, 1);
+    itm::Write8(kPort, value);
+    next_address += 1;
+    remaining_bytes -= 1;
+  }
+  if (remaining_bytes >= 2 && (reinterpret_cast<uintptr_t>(next_address) & 2)) {
+    uint16_t value;
+    memcpy(&value, next_address, 2);
+    itm::Write16(kPort, value);
+    next_address += 2;
+    remaining_bytes -= 2;
+  }
+
+  // Write big chunks while we can.
+  while (remaining_bytes >= 4) {
+    uint32_t value;
+    memcpy(&value, next_address, 4);
+    itm::Write32(kPort, value);
+    next_address += 4;
+    remaining_bytes -= 4;
+  }
+
+  // Write out any remaining uneven bytes on the end.
+  if (remaining_bytes >= 2) {
+    uint16_t value;
+    memcpy(&value, next_address, 2);
+    itm::Write16(kPort, value);
+    next_address += 2;
+    remaining_bytes -= 2;
+  }
+  if (remaining_bytes >= 1) {
+    uint8_t value;
+    memcpy(&value, next_address, 1);
+    itm::Write8(kPort, value);
+    next_address += 1;
+    remaining_bytes -= 1;
+  }
+}
+
+}  // namespace
+
+::std::unique_ptr<PrintingImplementation> CreatePrinting(
+    const PrintingParameters & /*parameters*/) {
+  return ::std::unique_ptr<PrintingImplementation>(new ItmPrinting());
+}
+
+extern "C" int _write(const int /*file*/, char *const ptr, const int len) {
+  WriteToPort<0>(gsl::span<const char>(ptr, len));
+  return len;
+}
+
+ItmPrinting::ItmPrinting() {
+  // Make sure we run the one-time initialization. It's important to do it here
+  // to ensure it's complete before interrupts are enabled, because it's not
+  // interrupt-safe.
+  _write(0, nullptr, 0);
+}
+
+int ItmPrinting::WriteStdout(gsl::span<const char> buffer) {
+  WriteToPort<0>(buffer);
+  return buffer.size();
+}
+
+int ItmPrinting::WriteDebug(gsl::span<const char> buffer) {
+  WriteToPort<1>(buffer);
+  return buffer.size();
+}
+
+}  // namespace motors
+}  // namespace frc971
diff --git a/motors/print/itm.h b/motors/print/itm.h
new file mode 100644
index 0000000..99f9970
--- /dev/null
+++ b/motors/print/itm.h
@@ -0,0 +1,32 @@
+#ifndef MOTORS_PRINT_ITM_
+#define MOTORS_PRINT_ITM_
+
+#include "motors/print/print.h"
+
+namespace frc971 {
+namespace motors {
+
+// A printing implementation via the SWO (trace output) pin. This requires an
+// attached debugger which is in SWD (Single Wire Debug) mode, has the SWO
+// (also known as JTAG_TDO) pin hooked up, and software support.
+//
+// To decode the output from this, use motors/print/itm_read.py.
+// To configure openocd to feed data to that:
+//   tpiu config internal /tmp/itm.fifo uart off 120000000 64000
+class ItmPrinting final : public PrintingImplementation {
+ public:
+  ItmPrinting();
+  ~ItmPrinting() override = default;
+
+  void Initialize() override {}
+
+  // This goes to stimulus port 0.
+  int WriteStdout(gsl::span<const char> buffer) override;
+  // This goes to stimulus port 1.
+  int WriteDebug(gsl::span<const char> buffer) override;
+};
+
+}  // namespace motors
+}  // namespace frc971
+
+#endif  // MOTORS_PRINT_ITM_
diff --git a/motors/print/itm_read.py b/motors/print/itm_read.py
new file mode 100755
index 0000000..d616da4
--- /dev/null
+++ b/motors/print/itm_read.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python3
+
+# This is a program to parse output from the ITM and DWT.
+# The "Debug ITM and DWT Packet Protocol" section of the ARMv7-M Architecture
+# Reference Manual is a good reference.
+#
+# This seems like it might be a poster child for using coroutines, but those
+# look scary so we're going to stick with generators.
+
+import io
+import os
+import sys
+
+def open_file_for_bytes(path):
+  '''Returns a file-like object which reads bytes without buffering.'''
+  # Not using `open` because it's unclear from the docs how (if it's possible at
+  # all) to get something that will only do one read call and return what that
+  # gets on a fifo.
+  try:
+    return io.FileIO(path, 'r')
+  except FileNotFoundError:
+    # If it wasn't found, try (once) to create it and then open again.
+    try:
+      os.mkfifo(path)
+    except FileExistsError:
+      pass
+    return io.FileIO(path, 'r')
+
+def read_bytes(path):
+  '''Reads bytes from a file. This is appropriate both for regular files and
+  fifos.
+  Args:
+    path: A path-like object to open.
+  Yields:
+    Individual bytes from the file, until hitting EOF.
+  '''
+  with open_file_for_bytes(path) as f:
+    while True:
+      buf = f.read(1024)
+      if not buf:
+        return
+      for byte in buf:
+        yield byte
+
+def parse_packets(source):
+  '''Parses a stream of bytes into packets.
+  Args:
+    source: A generator of individual bytes.
+  Generates:
+    Packets as bytes objects.
+  '''
+  try:
+    while True:
+      header = next(source)
+      if header == 0:
+        # Synchronization packets consist of a bunch of 0 bits (not necessarily
+        # a whole number of bytes), followed by a 128 byte. This is for hardware
+        # to synchronize on, but we're not in a position to do that, so
+        # presumably those should get filtered out before getting here?
+        raise 'Not sure how to handle synchronization packets'
+      packet = bytearray()
+      packet.append(header)
+      header_size = header & 3
+      if header_size == 0:
+        while packet[-1] & 128 and len(packet) < 7:
+          packet.append(next(source))
+      else:
+        if header_size == 3:
+          header_size = 4
+        for _ in range(header_size):
+          packet.append(next(source))
+      yield bytes(packet)
+  except StopIteration:
+    return
+
+class PacketParser(object):
+  def __init__(self):
+    self.stimulus_handlers = {}
+
+  def register_stimulus_handler(self, port_number, handler):
+    '''Registers a function to call on packets to the specified port.'''
+    self.stimulus_handlers[port_number] = handler
+
+  def process(self, path):
+    for packet in parse_packets(read_bytes(path)):
+      header = packet[0]
+      header_size = header & 3
+      if header_size == 0:
+        # TODO(Brian): At least handle overflow packets here.
+        pass
+      else:
+        port_number = header >> 3
+        if port_number in self.stimulus_handlers:
+          self.stimulus_handlers[port_number](packet[1:])
+        else:
+          print('Warning: unhandled stimulus port %d' % port_number,
+                file=sys.stderr)
+          self.stimulus_handlers[port_number] = lambda _: None
+
+if __name__ == '__main__':
+  parser = PacketParser()
+  def print_byte(payload):
+    sys.stdout.write(payload.decode('ascii'))
+  parser.register_stimulus_handler(0, print_byte)
+
+  for path in sys.argv[1:]:
+    parser.process(path)
diff --git a/motors/print/print.h b/motors/print/print.h
new file mode 100644
index 0000000..d51a2c3
--- /dev/null
+++ b/motors/print/print.h
@@ -0,0 +1,93 @@
+#ifndef MOTORS_PRINT_PRINT_H_
+#define MOTORS_PRINT_PRINT_H_
+
+#include <memory>
+
+#include "motors/core/kinetis.h"
+#include "third_party/GSL/include/gsl/gsl"
+
+namespace frc971 {
+namespace teensy {
+
+class AcmTty;
+
+}  // namespace teensy
+namespace motors {
+
+class PrintingImplementation {
+ public:
+  PrintingImplementation() = default;
+  virtual ~PrintingImplementation() = default;
+
+  PrintingImplementation(const PrintingImplementation &) = delete;
+  PrintingImplementation &operator=(const PrintingImplementation &) = delete;
+
+  virtual void Initialize() = 0;
+
+  // Writes something directly to stdout/stderr (they are treated as the same).
+  virtual int WriteStdout(gsl::span<const char> buffer) = 0;
+  // Writes something to a separate debug stream. Some implementations will
+  // always ignore this, and others will ignore it under some conditions.
+  virtual int WriteDebug(gsl::span<const char> buffer) { return buffer.size(); }
+};
+
+// A trivial printing "implementation" which simply does nothing. This is used
+// when a real implementation can't be created by CreatePrinting due to missing
+// parameters.
+class NopPrinting : public PrintingImplementation {
+ public:
+  NopPrinting() = default;
+  ~NopPrinting() override = default;
+
+  void Initialize() override {}
+  int WriteStdout(gsl::span<const char> buffer) override {
+    return buffer.size();
+  }
+};
+
+// Contains various parameters for controlling how the printing implementation
+// is initialized. Some of these are optional depending on which implementation
+// is selected, while others being missing will result in some implementations
+// turning into NOPs.
+struct PrintingParameters {
+  // This module must have its clock enabled and pinmuxing set up before calling
+  // CreatePrinting.
+  KINETISK_UART_t *stdout_uart_module = nullptr;
+  int stdout_uart_module_clock_frequency = 0;
+  int stdout_uart_baud_rate = 115200;
+  int stdout_uart_status_interrupt = -1;
+
+  // Setting this to true indicates the implementation should manage its own
+  // UsbDevice. If there are other USB functions around, set stdout_tty (and
+  // optionally debug_tty) instead.
+  bool dedicated_usb = false;
+
+  // If these are used, Initialize() must be called on the UsbDevice before the
+  // PrintingImplementation.
+  teensy::AcmTty *stdout_tty = nullptr;
+  teensy::AcmTty *debug_tty = nullptr;
+};
+
+// Creates an implementation of the linked-in type. Exactly one printing
+// implementation must be linked in. If all the necessary parameters aren't
+// filled out, this will return a NopPrinting instance.
+//
+// Some implementations will work even before calling this, or calling
+// Initialize() on the result. Others do require calling this before they will
+// work. This must be called before enabling any interrupts or some
+// implementations may deadlock.
+//
+// This should only be called once per program lifetime. Many implementations
+// manage global resources in the returned object. The resulting object may be
+// destroyed, but not while interrupts might be running. Destroying the object
+// may or may not stop printing.
+//
+// This will not enable any interrupts. When applicable, that is deferred until
+// Initialize() is called on the result.
+::std::unique_ptr<PrintingImplementation> CreatePrinting(
+    const PrintingParameters &parameters);
+
+}  // namespace motors
+}  // namespace frc971
+
+#endif  // MOTORS_PRINT_PRINT_H_
diff --git a/motors/print/semihosting.cc b/motors/print/semihosting.cc
new file mode 100644
index 0000000..799b928
--- /dev/null
+++ b/motors/print/semihosting.cc
@@ -0,0 +1,24 @@
+#include "motors/print/semihosting.h"
+
+#include "motors/core/semihosting.h"
+
+namespace frc971 {
+namespace motors {
+
+::std::unique_ptr<PrintingImplementation> CreatePrinting(
+    const PrintingParameters & /*parameters*/) {
+  return ::std::unique_ptr<PrintingImplementation>(new SemihostingPrinting());
+}
+
+extern "C" int _write(const int /*file*/, char *const ptr, const int len) {
+  semihosting::Write operation{2 /* stderr */, gsl::span<const char>(ptr, len)};
+  return len - operation.Execute();
+}
+
+int SemihostingPrinting::WriteStdout(gsl::span<const char> buffer) {
+  semihosting::Write operation{2 /* stderr */, buffer};
+  return buffer.size() - operation.Execute();
+}
+
+}  // namespace motors
+}  // namespace frc971
diff --git a/motors/print/semihosting.h b/motors/print/semihosting.h
new file mode 100644
index 0000000..6ea6c3b
--- /dev/null
+++ b/motors/print/semihosting.h
@@ -0,0 +1,35 @@
+#ifndef MOTORS_PRINT_SEMIHOSTING_H_
+#define MOTORS_PRINT_SEMIHOSTING_H_
+
+#include "motors/print/print.h"
+
+namespace frc971 {
+namespace motors {
+
+// A printing implementation which uses the ARM semihosting interface. This
+// requries an attached debugger with software support.
+//
+// You have to do "arm semihosting enable" in openocd to enable this.
+// It also seems to be broken with the usb-tiny-h in the openocd version we're
+// using, but works fine with the st-link-v2.
+// It may also only work if you do this immediately after starting openocd.
+//
+// Note that this implementation has strange effects on timing even of
+// interrupts-disabled code and is in general extremely slow.
+class SemihostingPrinting final : public PrintingImplementation {
+ public:
+  SemihostingPrinting() = default;
+  ~SemihostingPrinting() override = default;
+
+  void Initialize() override {}
+
+  int WriteStdout(gsl::span<const char> buffer) override;
+
+  // Could easily implement an optional WriteDebug which goes to a separate
+  // file if the name is filled out in the parameters.
+};
+
+}  // namespace motors
+}  // namespace frc971
+
+#endif  // MOTORS_PRINT_SEMIHOSTING_H_
diff --git a/motors/print/uart.cc b/motors/print/uart.cc
new file mode 100644
index 0000000..84b33cd
--- /dev/null
+++ b/motors/print/uart.cc
@@ -0,0 +1,69 @@
+#include "motors/print/uart.h"
+
+#include "motors/core/kinetis.h"
+
+#include <atomic>
+
+namespace frc971 {
+namespace motors {
+namespace {
+
+::std::atomic<teensy::InterruptBufferedUart *> global_stdout{nullptr};
+
+}  // namespace
+
+::std::unique_ptr<PrintingImplementation> CreatePrinting(
+    const PrintingParameters &parameters) {
+  if (parameters.stdout_uart_module == nullptr) {
+    return ::std::unique_ptr<PrintingImplementation>(new NopPrinting());
+  }
+  if (parameters.stdout_uart_module_clock_frequency == 0) {
+    return ::std::unique_ptr<PrintingImplementation>(new NopPrinting());
+  }
+  if (parameters.stdout_uart_status_interrupt < 0) {
+    return ::std::unique_ptr<PrintingImplementation>(new NopPrinting());
+  }
+  return ::std::unique_ptr<PrintingImplementation>(
+      new UartPrinting(parameters));
+}
+
+extern "C" void uart0_status_isr(void) {
+  teensy::InterruptBufferedUart *const tty =
+      global_stdout.load(::std::memory_order_relaxed);
+  DisableInterrupts disable_interrupts;
+  tty->HandleInterrupt(disable_interrupts);
+}
+
+UartPrinting::UartPrinting(const PrintingParameters &parameters)
+    : stdout_uart_{parameters.stdout_uart_module,
+                   parameters.stdout_uart_module_clock_frequency},
+      stdout_status_interrupt_(parameters.stdout_uart_status_interrupt) {
+  stdout_uart_.Initialize(parameters.stdout_uart_baud_rate);
+}
+
+UartPrinting::~UartPrinting() {
+  NVIC_DISABLE_IRQ(stdout_status_interrupt_);
+  global_stdout.store(nullptr, ::std::memory_order_release);
+}
+
+void UartPrinting::Initialize() {
+  global_stdout.store(&stdout_uart_, ::std::memory_order_release);
+  NVIC_ENABLE_IRQ(stdout_status_interrupt_);
+}
+
+int UartPrinting::WriteStdout(gsl::span<const char> buffer) {
+  stdout_uart_.Write(buffer);
+  return buffer.size();
+}
+
+extern "C" int _write(const int /*file*/, char *const ptr, const int len) {
+  teensy::InterruptBufferedUart *const tty =
+      global_stdout.load(::std::memory_order_acquire);
+  if (tty != nullptr) {
+    tty->Write(gsl::make_span(ptr, len));
+  }
+  return len;
+}
+
+}  // namespace motors
+}  // namespace frc971
diff --git a/motors/print/uart.h b/motors/print/uart.h
new file mode 100644
index 0000000..f4a61c8
--- /dev/null
+++ b/motors/print/uart.h
@@ -0,0 +1,34 @@
+#ifndef MOTORS_PRINT_UART_H_
+#define MOTORS_PRINT_UART_H_
+
+#include "motors/peripheral/uart.h"
+#include "motors/print/print.h"
+
+namespace frc971 {
+namespace motors {
+
+// A printing implementation using a hardware UART. This has a reasonably sized
+// buffer in memory and uses interrupts to keep the hardware busy. It could
+// support DMA too in the future.
+class UartPrinting : public PrintingImplementation {
+ public:
+  // All required parameters must be filled out.
+  UartPrinting(const PrintingParameters &parameters);
+  ~UartPrinting() override;
+
+  void Initialize() override;
+
+  int WriteStdout(gsl::span<const char> buffer) override;
+
+ private:
+  teensy::InterruptBufferedUart stdout_uart_;
+  const int stdout_status_interrupt_;
+};
+
+// Could easily create a subclass of UartPrinting that also implements
+// WriteDebug on a second UART, and conditionally instantiate that.
+
+}  // namespace motors
+}  // namespace frc971
+
+#endif  // MOTORS_PRINT_UART_H_
diff --git a/motors/print/usb.cc b/motors/print/usb.cc
new file mode 100644
index 0000000..9284ecd
--- /dev/null
+++ b/motors/print/usb.cc
@@ -0,0 +1,66 @@
+#include "motors/print/usb.h"
+
+#include <atomic>
+
+#include "motors/core/kinetis.h"
+
+namespace frc971 {
+namespace motors {
+namespace {
+
+::std::atomic<teensy::AcmTty *> global_stdout{nullptr};
+
+}  // namespace
+
+::std::unique_ptr<PrintingImplementation> CreatePrinting(
+    const PrintingParameters &parameters) {
+  if (parameters.dedicated_usb) {
+    return ::std::unique_ptr<PrintingImplementation>(
+        new DedicatedUsbPrinting());
+  }
+  if (parameters.stdout_tty != nullptr) {
+    return ::std::unique_ptr<PrintingImplementation>(
+        new UsbPrinting(parameters.stdout_tty, parameters.debug_tty));
+  }
+  return ::std::unique_ptr<PrintingImplementation>(new NopPrinting());
+}
+
+extern "C" int _write(const int /*file*/, char *const ptr, const int len) {
+  teensy::AcmTty *const tty = global_stdout.load(::std::memory_order_acquire);
+  if (tty != nullptr) {
+    return tty->Write(ptr, len);
+  }
+  return len;
+}
+
+UsbPrinting::UsbPrinting(teensy::AcmTty *stdout_tty, teensy::AcmTty *debug_tty)
+    : stdout_tty_(stdout_tty), debug_tty_(debug_tty) {}
+
+UsbPrinting::~UsbPrinting() {
+  global_stdout.store(nullptr, ::std::memory_order_release);
+}
+
+void UsbPrinting::Initialize() {
+  global_stdout.store(stdout_tty_, ::std::memory_order_release);
+}
+
+DedicatedUsbPrinting::DedicatedUsbPrinting()
+    : usb_device_{0, 0x16c0, 0x0490},
+      stdout_tty_{&usb_device_},
+      debug_tty_{&usb_device_} {
+  usb_device_.SetManufacturer("FRC 971 Spartan Robotics");
+  usb_device_.SetProduct("FET12v2");
+  NVIC_SET_SANE_PRIORITY(IRQ_USBOTG, 0x7);
+}
+
+DedicatedUsbPrinting::~DedicatedUsbPrinting() {
+  global_stdout.store(nullptr, ::std::memory_order_release);
+}
+
+void DedicatedUsbPrinting::Initialize() {
+  usb_device_.Initialize();
+  global_stdout.store(&stdout_tty_, ::std::memory_order_release);
+}
+
+}  // namespace motors
+}  // namespace frc971
diff --git a/motors/print/usb.h b/motors/print/usb.h
new file mode 100644
index 0000000..7e63184
--- /dev/null
+++ b/motors/print/usb.h
@@ -0,0 +1,62 @@
+#ifndef MOTORS_PRINT_USB_H_
+#define MOTORS_PRINT_USB_H_
+
+#include "motors/print/print.h"
+#include "motors/usb/cdc.h"
+#include "motors/usb/usb.h"
+
+namespace frc971 {
+namespace motors {
+
+// A printing implementation which uses externally-created functions. The stdout
+// one is required, while the debug one is optional.
+class UsbPrinting final : public PrintingImplementation {
+ public:
+  UsbPrinting(teensy::AcmTty *stdout_tty, teensy::AcmTty *debug_tty);
+  ~UsbPrinting() override;
+
+  void Initialize() override;
+
+  int WriteStdout(gsl::span<const char> buffer) override {
+    return stdout_tty_->Write(buffer.data(), buffer.size());
+  }
+
+  int WriteDebug(gsl::span<const char> buffer) override {
+    if (debug_tty_ == nullptr) {
+      return buffer.size();
+    }
+    return debug_tty_->Write(buffer.data(), buffer.size());
+  }
+
+ private:
+  teensy::AcmTty *const stdout_tty_;
+  teensy::AcmTty *const debug_tty_;
+};
+
+// A printing implementation which creates its own UsbDevice and functions, and
+// manages their lifecycle.
+class DedicatedUsbPrinting final : public PrintingImplementation {
+ public:
+  DedicatedUsbPrinting();
+  ~DedicatedUsbPrinting() override;
+
+  void Initialize() override;
+
+  int WriteStdout(gsl::span<const char> buffer) override {
+    return stdout_tty_.Write(buffer.data(), buffer.size());
+  }
+
+  int WriteDebug(gsl::span<const char> buffer) override {
+    return debug_tty_.Write(buffer.data(), buffer.size());
+  }
+
+ private:
+  teensy::UsbDevice usb_device_;
+  teensy::AcmTty stdout_tty_;
+  teensy::AcmTty debug_tty_;
+};
+
+}  // namespace motors
+}  // namespace frc971
+
+#endif  // MOTORS_PRINT_USB_H_
diff --git a/motors/simple_receiver.cc b/motors/simple_receiver.cc
index 3b174b0..36d489f 100644
--- a/motors/simple_receiver.cc
+++ b/motors/simple_receiver.cc
@@ -11,11 +11,10 @@
 #include "motors/peripheral/adc.h"
 #include "motors/peripheral/can.h"
 #include "motors/peripheral/configuration.h"
+#include "motors/print/print.h"
 #include "motors/seems_reasonable/drivetrain_dog_motor_plant.h"
 #include "motors/seems_reasonable/polydrivetrain_dog_motor_plant.h"
 #include "motors/seems_reasonable/spring.h"
-#include "motors/usb/cdc.h"
-#include "motors/usb/usb.h"
 #include "motors/util.h"
 
 namespace frc971 {
@@ -83,8 +82,6 @@
 };
 
 
-::std::atomic<teensy::AcmTty *> global_stdout{nullptr};
-
 ::std::atomic<PolyDrivetrain<float> *> global_polydrivetrain{nullptr};
 ::std::atomic<Spring *> global_spring{nullptr};
 
@@ -647,15 +644,6 @@
 extern "C" {
 
 void *__stack_chk_guard = (void *)0x67111971;
-
-int _write(int /*file*/, char *ptr, int len) {
-  teensy::AcmTty *const tty = global_stdout.load(::std::memory_order_acquire);
-  if (tty != nullptr) {
-    return tty->Write(ptr, len);
-  }
-  return 0;
-}
-
 void __stack_chk_fail(void);
 
 }  // extern "C"
@@ -742,13 +730,11 @@
 
   delay(100);
 
-  teensy::UsbDevice usb_device(0, 0x16c0, 0x0492);
-  usb_device.SetManufacturer("Seems Reasonable LLC");
-  usb_device.SetProduct("Simple Receiver Board");
-
-  teensy::AcmTty tty0(&usb_device);
-  global_stdout.store(&tty0, ::std::memory_order_release);
-  usb_device.Initialize();
+  PrintingParameters printing_parameters;
+  printing_parameters.dedicated_usb = true;
+  const ::std::unique_ptr<PrintingImplementation> printing =
+      CreatePrinting(printing_parameters);
+  printing->Initialize();
 
   SIM_SCGC6 |= SIM_SCGC6_PIT;
   // Workaround for errata e7914.