started writing actual cape code
diff --git a/bbb_cape/src/cape/Makefile b/bbb_cape/src/cape/Makefile
index 6391b21..101552a 100644
--- a/bbb_cape/src/cape/Makefile
+++ b/bbb_cape/src/cape/Makefile
@@ -15,10 +15,11 @@
 
 CPPFLAGS := -I.. -ICMSIS \
 
-CFLAGS := -nostartfiles -nostdlib \
+CFLAGS := -nostartfiles -nostdlib -ffreestanding -fbuiltin \
 	-O3 -mcpu=cortex-m3 \
 	-mthumb -Wl,--gc-sections -ffunction-sections -Wl,-static \
 	-Wall -Werror --std=gnu99 \
+	-Wstrict-aliasing=2 -Wcast-qual -Wpointer-arith \
 
 LDFLAGS := -O3 -mcpu=cortex-m3 \
 	-nostartfiles -nostdlib \
@@ -30,6 +31,9 @@
 
 OBJECTS_main := main \
 	uart_common \
+	uart_dma \
+	fill_packet \
+	cows \
 
 OBJECTS_bootloader := bootloader \
 	uart_common \
diff --git a/bbb_cape/src/cape/bootloader.c b/bbb_cape/src/cape/bootloader.c
index ef816f3..fe2f6be 100644
--- a/bbb_cape/src/cape/bootloader.c
+++ b/bbb_cape/src/cape/bootloader.c
@@ -5,7 +5,6 @@
 // Sets everything up and then jumps to the main code.
 static void jump_to_main(void) __attribute__((noreturn));
 static void jump_to_main(void) {
-  // 0x20008000
   __asm__ __volatile__(
       "mov sp, %[stack]\n\t"
       "bx %[reset]" : :
diff --git a/bbb_cape/src/cape/cows.c b/bbb_cape/src/cape/cows.c
new file mode 100644
index 0000000..b0d168e
--- /dev/null
+++ b/bbb_cape/src/cape/cows.c
@@ -0,0 +1,59 @@
+#include "cape/cows.h"
+
+#include <limits.h>
+
+// This implementation is based on
+// <http://www.jacquesf.com/2011/03/consistent-overhead-byte-stuffing/>.
+
+uint32_t cows_stuff(const void *restrict source_in, size_t source_length,
+                    void *restrict destination_in) {
+  const uint32_t *restrict source = (const uint32_t *)source_in;
+  uint32_t *restrict destination = (uint32_t *)destination_in;
+  size_t source_index = 0;
+  size_t destination_index = 1;
+  size_t code_index = 0;
+  uint32_t code = 1;
+
+  while (source_index < ((source_length - 1) / 4) + 1) {
+    if (source[source_index] == 0) {
+      destination[code_index] = code;
+      code = 1;
+      code_index = destination_index++;
+      ++source_index;
+    } else {
+      destination[destination_index++] = source[source_index++];
+      ++code;
+      if (code == UINT32_MAX) {
+        destination[code_index] = code;
+        code = 1;
+        code_index = destination_index++;
+      }
+    }
+  }
+  destination[code_index] = code;
+  return destination_index;
+}
+
+uint32_t cows_unstuff(const uint32_t *restrict source, size_t source_length,
+                      uint32_t *restrict destination) {
+  size_t source_index = 0;
+  size_t destination_index = 0;
+  uint32_t code;
+
+  while (source_index < ((source_length - 1) / 4) + 1) {
+    code = source[source_index];
+    if (source_index + code > source_length && code != 1) {
+      return 0;
+    }
+
+    ++source_index;
+
+    for (uint32_t i = 1; i < code; ++i) {
+      destination[destination_index++] = source[source_index++];
+    }
+    if (code != UINT32_MAX && source_index != source_length) {
+      destination[destination_index++] = 0;
+    }
+  }
+  return destination_index;
+}
diff --git a/bbb_cape/src/cape/cows.h b/bbb_cape/src/cape/cows.h
new file mode 100644
index 0000000..a589fca
--- /dev/null
+++ b/bbb_cape/src/cape/cows.h
@@ -0,0 +1,33 @@
+#ifndef CAPE_COWS_H_
+#define CAPE_COWS_H_
+
+#include <sys/types.h>
+#include <stdint.h>
+
+// This file implements something very similar to Consistent Overhead Byte
+// Stuffing <http://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing>. It
+// uses that algorithm except with 4-byte chunks instead of individual bytes
+// because that's more efficient on 32-bit processors. I'm calling it Consistent
+// Overhead Word Stuffing.
+
+// source_length will be rounded up a multiple of 4. That many bytes of source
+// will be read.
+// destination must have at least
+// ([source_length rounded up to a multiple of 4] / (2^32 - 1) rounded up) * 4
+// more bytes than source_length available.
+// source and destination both have to be 4-byte aligned.
+// Returns the total number of words written (not necessarily the maximum given
+// in the above description of destination).
+uint32_t cows_stuff(const void *restrict source, size_t source_length,
+                    void *restrict destination);
+
+// source_length will be rounded up a multiple of 4. That many bytes of source
+// will be read.
+// Destination must be big enough to hold all source_length bytes (see
+// cows_stuff for the exact size it might be).
+// source and destination both have to be 4-byte aligned.
+// Returns the total number of words written to destination or 0 for error.
+uint32_t cows_unstuff(const uint32_t *restrict source, size_t source_length,
+                      uint32_t *restrict destination);
+
+#endif  // CAPE_COWS_H_
diff --git a/bbb_cape/src/cape/data_struct.h b/bbb_cape/src/cape/data_struct.h
new file mode 100644
index 0000000..051095e
--- /dev/null
+++ b/bbb_cape/src/cape/data_struct.h
@@ -0,0 +1,107 @@
+// This isn't really a header file. It's designed to be #included directly into
+// other code (possibly in a namespace or whatever), so it doesn't have include
+// guards.
+// This means that it can not #include anything else because it (sometimes) gets
+// #included inside a namespace.
+// <stdint.h> must be #included by the containing file.
+// In the cape code, fill_packet.h #includes this file.
+// In the fitpc code, frc971/input/gyro_board_data.h #includes this file.
+
+#pragma pack(push, 1)
+// Be careful with declaration order in here. ARM doesn't like unaligned
+// accesses!
+struct DATA_STRUCT_NAME {
+  int64_t gyro_angle;
+
+  union {
+    struct {
+      // In us since the cape last reset.
+      uint64_t timestamp;
+
+      struct {
+        // If the current gyro_angle has been not updated because of a bad
+        // reading from the sensor.
+        uint8_t old_gyro_reading : 1;
+        // If we're not going to get any more good gyro_angles.
+        uint8_t bad_gyro : 1;
+      };
+    };
+    struct {
+      uint64_t header1, header2;
+    };
+  };
+
+  // We are 64-bit aligned at this point.
+
+  union {
+    struct {
+      int32_t left_drive;
+      int32_t right_drive;
+      int32_t shooter_angle;
+      int32_t shooter;
+      int32_t indexer;
+      int32_t wrist;
+
+      int32_t capture_top_rise;
+      int32_t capture_top_fall;
+      int32_t capture_bottom_fall_delay;
+      int32_t capture_wrist_rise;
+      int32_t capture_shooter_angle_rise;
+
+      uint16_t battery_voltage;
+      uint16_t left_drive_hall;
+      uint16_t right_drive_hall;
+
+      int8_t top_rise_count;
+
+      int8_t top_fall_count;
+
+      int8_t bottom_rise_count;
+
+      int8_t bottom_fall_delay_count;
+      int8_t bottom_fall_count;
+
+      int8_t wrist_rise_count;
+
+      int8_t shooter_angle_rise_count;
+
+      struct {
+        uint8_t wrist_hall_effect : 1;
+        uint8_t angle_adjust_bottom_hall_effect : 1;
+        uint8_t top_disc : 1;
+        uint8_t bottom_disc : 1;
+        uint8_t loader_top : 1;
+        uint8_t loader_bottom : 1;
+      };
+    } main;
+    
+    struct {
+      union {
+        struct {
+        };
+        uint16_t booleans;
+      };
+    } bot3;
+  };
+} __attribute__((aligned(8)));
+#pragma pack(pop)
+
+// The number of bytes that we actually send (so it stays consistent) (including
+// the byte-stuffing overhead and the CRC on the end).
+#define DATA_STRUCT_SEND_SIZE 200
+
+#ifdef __cplusplus
+#define STATIC_ASSERT(cond, msg) static_assert(cond, #msg)
+#endif
+// 4 bytes of 0s at the beginning, 4 bytes of byte-stuffing overhead, and 4
+// bytes of CRC on the end.
+STATIC_ASSERT(
+    (sizeof(struct DATA_STRUCT_NAME) + 8 + 4) <= DATA_STRUCT_SEND_SIZE,
+    The_sensor_data_structure_is_too_big);
+// The byte-stuffing and CRC both work in chunks of 4 bytes, so it has to be a
+// multiple of that in size.
+STATIC_ASSERT((sizeof(struct DATA_STRUCT_NAME) % 4) == 0,
+              The_sensor_data_structure_is_not_a_multiple_of_4_bytes);
+#ifdef __cplusplus
+#undef STATIC_ASSERT
+#endif
diff --git a/bbb_cape/src/cape/fill_packet.c b/bbb_cape/src/cape/fill_packet.c
new file mode 100644
index 0000000..fe0bb2c
--- /dev/null
+++ b/bbb_cape/src/cape/fill_packet.c
@@ -0,0 +1,43 @@
+#include "cape/fill_packet.h"
+
+#include <string.h>
+
+#include <STM32F2XX.h>
+
+#include "cape/uart_dma.h"
+#include "cape/cows.h"
+
+static uint8_t buffer1[DATA_STRUCT_SEND_SIZE] __attribute__((aligned(4)));
+static uint8_t buffer2[DATA_STRUCT_SEND_SIZE] __attribute__((aligned(4)));
+
+// Fills the new packet with data.
+void uart_dma_callback(uint8_t *buffer) {
+  struct {
+    struct DataStruct packet;
+    uint8_t padding[DATA_STRUCT_SEND_SIZE - sizeof(struct DataStruct) - 12];
+    uint32_t checksum;
+  } data __attribute__((aligned(4)));
+  STATIC_ASSERT(sizeof(data) == DATA_STRUCT_SEND_SIZE - 8,
+                The_size_of_the_data_is_wrong);
+  struct DataStruct *packet = &data.packet;
+
+  CRC->CR = 1;  // reset it
+  uint32_t *p1;
+  memcpy(&p1, &packet, sizeof(void *));
+  {
+    uint32_t *restrict p = p1;
+    for (; p < (uint32_t *)(packet + 1); ++p) {
+      CRC->DR = *p;
+    }
+  }
+  data.checksum = CRC->DR;
+
+  memset(buffer, 0, 4);
+  cows_stuff(&data, sizeof(data), buffer + 4);
+}
+
+void fill_packet_start(void) {
+  RCC->AHB1ENR |= RCC_AHB1ENR_CRCEN;
+
+  uart_dma_configure(3000000, DATA_STRUCT_SEND_SIZE, buffer1, buffer2);
+}
diff --git a/bbb_cape/src/cape/fill_packet.h b/bbb_cape/src/cape/fill_packet.h
new file mode 100644
index 0000000..f3f1543
--- /dev/null
+++ b/bbb_cape/src/cape/fill_packet.h
@@ -0,0 +1,14 @@
+#ifndef CAPE_FILL_PACKET_H_
+#define CAPE_FILL_PACKET_H_
+
+#include <stdint.h>
+
+#include "cape/util.h"
+#define DATA_STRUCT_NAME DataStruct
+#include "cape/data_struct.h"
+#undef DATA_STRUCT_NAME
+
+// Starts writing out sensor packets as fast as the serial port can write them.
+void fill_packet_start(void);
+
+#endif  // CAPE_FILL_PACKET_H_
diff --git a/bbb_cape/src/cape/hardware.notes b/bbb_cape/src/cape/hardware.notes
new file mode 100644
index 0000000..05b0dfe
--- /dev/null
+++ b/bbb_cape/src/cape/hardware.notes
@@ -0,0 +1,82 @@
+EXTI interrupt groupings:
+  by number in the port
+  0,1,2,3,4,5-9,10-15
+
+
+PA0  TIM5.1
+PA1  TIM5.2
+PA2
+PA3
+PA4  SPI3_NSS (slave select)
+PA5  TIM2.1
+PA6  TIM3.1
+PA7
+PA8  TIM1.1
+PA9  USART1_TX (bootloader)
+PA10 USART1_RX (bootloader)
+PA11 (don't change during reset into bootloader)
+PA12 (don't change during reset into bootloader)
+PA13 SWDIO
+PA14 SWCLK
+PA15 (gets pulled up during reset (JTAG pin))
+PB0  TIM1.2
+PB1
+PB2  BOOT1 (tie to GND)
+PB3  TIM2.2
+PB4  (gets pulled up during reset (JTAG pin))
+PB5  TIM3.2 (don't change during reset into bootloader)
+PB6  TIM4.1
+PB7  TIM4.2
+PB8
+PB9
+PB10
+PB11 (don't change during reset into bootloader)
+PB12 SPI2_NSS (slave select)
+PB13 SPI2_SCK
+PB14 SPI2_MISO
+PB15 SPI2_MOSI
+PC0
+PC1
+PC2
+PC3
+PC4
+PC5
+PC6  TIM8.1
+PC7  TIM8.2
+PC8
+PC9
+PC10 SPI3_SCK
+PC11 SPI3_MISO
+PC12 SPI3_MOSI
+PC13
+PC14
+PC15
+PD2
+
+[GPIOs]
+C0  enc
+C1  enc
+A2  enc
+A3  enc
+
+B2
+#A2  TIM9.1
+C4
+C5
+A7
+B8
+B9
+B10
+A11
+A12
+C13
+C14
+C15
+
+C8  BBB_RST TIM8.3
+
+
+IO compensation cell?
+  controls slew rates
+  increased power draw
+  should probably just enable it?
diff --git a/bbb_cape/src/cape/main.ld b/bbb_cape/src/cape/main.ld
index 7e8d43a..ec39b8f 100644
--- a/bbb_cape/src/cape/main.ld
+++ b/bbb_cape/src/cape/main.ld
@@ -8,6 +8,7 @@
 {
 	.text :
 	{
+		/* make sure this stays at the beginning of FLASH */
 		KEEP(*(Reset_Handler))
 	} > FLASH
 }
diff --git a/bbb_cape/src/cape/peripherial_usage.notes b/bbb_cape/src/cape/peripherial_usage.notes
new file mode 100644
index 0000000..ee5c0a8
--- /dev/null
+++ b/bbb_cape/src/cape/peripherial_usage.notes
@@ -0,0 +1,28 @@
+This file documents what code is using which peripherial(s), priorities when
+applicable, and which file(s) the code is in. The purpose is to make assigning
+priorities to everything else easier because the relative priorities of
+everything is what matters; the absolute priority is meaningless. It also helps
+with choosing things like timers.
+
+[BBB communication]
+USART1
+uart.[ch]
+  USART1_IRQ:3
+uart_dma.[ch]
+  DMA2.7:2
+  DMA2.7_IRQ:6
+
+[gyro communication]
+SPI3
+
+[ADC communication]
+SPI2
+
+[encoders]
+encoder.c
+  TIM1,TIM2,TIM3,TIM4,TIM5,TIM8
+
+[sensor packet sending]
+fill_packet.c
+  TIM6
+  CRC
diff --git a/bbb_cape/src/cape/uart.h b/bbb_cape/src/cape/uart.h
index 8216bf0..7ed24f4 100644
--- a/bbb_cape/src/cape/uart.h
+++ b/bbb_cape/src/cape/uart.h
@@ -11,6 +11,8 @@
 
 // Callbacks to be implemented by the user.
 // Implemented as weak symbols that do nothing by default.
+// The argument is the number of bytes transmitted or received. It will be less
+// than the requested number if there was an error.
 void uart_transmit_callback(int bytes_transmitted);
 void uart_receive_callback(int bytes_received);
 
diff --git a/bbb_cape/src/cape/uart_common.c b/bbb_cape/src/cape/uart_common.c
index 3b70764..77c796f 100644
--- a/bbb_cape/src/cape/uart_common.c
+++ b/bbb_cape/src/cape/uart_common.c
@@ -4,6 +4,7 @@
 #define FPCLK 60000000
 
 void uart_common_configure(int baud) {
+  RCC->APB2ENR |= RCC_APB2ENR_USART1EN;
   // baud = 60MHz / (8 * (2 - OVER8) * (mantissa / fraction))
   int fraction = 8;  // the biggest it can be with OVER8=0
   int mantissa = FPCLK * (16 /* 8 * (2 - OVER8) */ / fraction) / baud;
diff --git a/bbb_cape/src/cape/uart_dma.c b/bbb_cape/src/cape/uart_dma.c
new file mode 100644
index 0000000..4b2ec45
--- /dev/null
+++ b/bbb_cape/src/cape/uart_dma.c
@@ -0,0 +1,71 @@
+#include "cape/uart_dma.h"
+#include "cape/uart_common_private.h"
+
+#include "cape/util.h"
+#include "cape/uart_common.h"
+
+#define DMA DMA1
+#define DMA_STREAM_NUMBER 7
+#define DMA_Stream DMA1_Stream7
+#define DMA_SR DMA1->HISR
+#define DMA_FCR DMA1->HIFCR
+#define DMA_SR_SHIFT 3
+#define DMA_Stream_IRQHandler DMA1_Stream7_IRQHandler
+#define DMA_Stream_IRQn DMA1_Stream7_IRQn
+#define RCC_AHB1ENR_DMAEN RCC_AHB1ENR_DMA1EN
+
+#define DMA_SR_BIT(bit) (1 << (bit + 6 * DMA_SR_SHIFT))
+
+void uart_dma_callback(uint8_t *new_buffer) __attribute__((weak));
+void uart_dma_callback(uint8_t *new_buffer) {}
+
+static uint8_t *volatile buffer1, *volatile buffer2;
+
+void DMA_Stream_IRQHandler(void) {
+  uint32_t status = DMA_SR;
+  if (status & DMA_SR_BIT(5)) {  // transfer completed
+    DMA_FCR = DMA_SR_BIT(5);
+    uart_dma_callback(((DMA_Stream->CR & DMA_SxCR_CT) == 0) ? buffer2
+                                                            : buffer1);
+  } else if (status & DMA_SR_BIT(3)) {  // transfer error
+    DMA_FCR = DMA_SR_BIT(3);
+    // Somebody probably wrote to the wrong buffer, which disables the DMA, so
+    // we now need to re-enable it.
+    // If we're fighting somebody else writing stuff, we'll do this a bunch of
+    // times, but oh well.
+    DMA_Stream->CR |= DMA_SxCR_EN;
+  }
+}
+
+void uart_dma_configure(int baud, int bytes,
+                        uint8_t *buffer1_in, uint8_t *buffer2_in) {
+  uart_common_configure(baud);
+
+  buffer1 = buffer1_in;
+  buffer2 = buffer2_in;
+  uart_dma_callback(buffer1);
+
+  RCC->AHB1ENR |= RCC_AHB1ENR_DMAEN;
+  DMA_Stream->PAR = (uint32_t)&UART->DR;
+  DMA_Stream->M0AR = (uint32_t)buffer1;
+  DMA_Stream->M1AR = (uint32_t)buffer2;
+  // This is measured in chunks of PSIZE bytes, not MSIZE.
+  DMA_Stream->NDTR = bytes;
+  DMA_FCR = 0xF << DMA_SR_SHIFT;
+  DMA_Stream->CR = DMA_STREAM_NUMBER << 25 |
+      DMA_SxCR_DBM /* enable double buffer mode */ |
+      2 << 16 /* priority */ |
+      2 << 13 /* memory data size = 32 bits */ |
+      0 << 11 /* peripherial data size = 8 bits */ |
+      DMA_SxCR_MINC /* increment memory address */ |
+      1 << 6 /* memory to peripherial */ |
+      DMA_SxCR_TCIE | DMA_SxCR_TEIE;
+  DMA_Stream->FCR =
+      DMA_SxFCR_DMDIS /* disable direct mode (enable the FIFO) */ |
+      1 /* 1/2 full threshold */;
+  DMA_Stream->CR |= DMA_SxCR_EN;  // enable it
+  NVIC_SetPriority(DMA_Stream_IRQn, 6);
+  NVIC_EnableIRQ(DMA_Stream_IRQn);
+
+  uart_dma_callback(buffer2);
+}
diff --git a/bbb_cape/src/cape/uart_dma.h b/bbb_cape/src/cape/uart_dma.h
new file mode 100644
index 0000000..83e7637
--- /dev/null
+++ b/bbb_cape/src/cape/uart_dma.h
@@ -0,0 +1,21 @@
+#ifndef CAPE_UART_DMA_H_
+#define CAPE_UART_DMA_H_
+
+#include <stdint.h>
+
+// This file deals with USART1 over DMA. It sets the DMA stream to double-buffer
+// mode and calls a function when it's time to fill a new buffer. It only
+// supports sending.
+
+// Callback to be implemented by the user.
+// Implemented as a weak symbol that does nothing by default.
+// new_buffer is the buffer that should be filled out to be written next.
+void uart_dma_callback(uint8_t *new_buffer);
+
+// See uart_common_configure in uart_common.h for details about baud.
+// bytes is the size off buffer1 and buffer2.
+// Calls uart_dma_callback twice (for each buffer) to get started.
+void uart_dma_configure(int baud, int bytes,
+                        uint8_t *buffer1, uint8_t *buffer2);
+
+#endif  // CAPE_UART_DMA_H_
diff --git a/bbb_cape/src/cape/util.h b/bbb_cape/src/cape/util.h
index b374ac8..dac30d5 100644
--- a/bbb_cape/src/cape/util.h
+++ b/bbb_cape/src/cape/util.h
@@ -3,6 +3,9 @@
 
 #define ALIAS_WEAK(f) __attribute__ ((weak, alias (#f)))
 
+// MSG has to be separated_with_spaces.
+#define STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(!!(COND))*2-1]
+
 // Prevents the compiler from reordering memory operations around this.
 static inline void compiler_memory_barrier(void) {
   __asm__ __volatile__("" ::: "memory");