(theoretically) got encoders working

Everything up to sending all 8 encoder values should work now (including
the packet format + header information, bootloader (fixed some bugs in
the UART code for that), etc).
diff --git a/bbb_cape/src/cape/Makefile b/bbb_cape/src/cape/Makefile
index 244d160..5dee875 100644
--- a/bbb_cape/src/cape/Makefile
+++ b/bbb_cape/src/cape/Makefile
@@ -35,6 +35,8 @@
 	uart \
 	fill_packet \
 	cows \
+	encoder \
+	crc \
 
 OBJECTS_bootloader := bootloader \
 	uart_common \
@@ -66,9 +68,9 @@
 		-Wa,-Map -Wa,$(OBJDIR)$*.map
 
 $(OBJECTS:%=$(OBJDIR)%.o): $(OBJDIR)%.o: %.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) -MD -MP -c -o $@ $<
+	$(CC) $(CPPFLAGS) $(CFLAGS) -MD -MP -MT '$@ $*.s' -c -o $@ $<
 $(OBJDIR)%.o: %.S
-	$(CC) $(CPPFLAGS) $(ASFLAGS) -MD -MP -c -o $@ $<
+	$(CC) $(CPPFLAGS) $(ASFLAGS) -MD -MP -MT '$@ $*.s' -c -o $@ $<
 
 # So that you can see the assembly for an individual file with any comments etc.
 $(OBJECTS:%=%.s): %.s: %.c
diff --git a/bbb_cape/src/cape/bootloader.c b/bbb_cape/src/cape/bootloader.c
index fe2f6be..1ed6218 100644
--- a/bbb_cape/src/cape/bootloader.c
+++ b/bbb_cape/src/cape/bootloader.c
@@ -1,5 +1,7 @@
 #include <stdint.h>
 
+#include <STM32F2XX.h>
+
 #include "cape/bootloader_handoff.h"
 
 // Sets everything up and then jumps to the main code.
@@ -14,5 +16,11 @@
 }
 
 void _start(void) {
+  SYSCFG->CMPCR = SYSCFG_CMPCR_CMP_PD;  // enable IO compensation cell
+  while (!(SYSCFG->CMPCR & SYSCFG_CMPCR_READY)) {}  // wait for it to be ready
+
+  // We don't have anything on the 1 port D pin, so don't bother enabling it.
+  RCC->AHB1ENR |= RCC_AHB1ENR_GPIOAEN | RCC_AHB1ENR_GPIOBEN | RCC_AHB1ENR_GPIOCEN;
+
   jump_to_main();
 }
diff --git a/bbb_cape/src/cape/crc.c b/bbb_cape/src/cape/crc.c
new file mode 100644
index 0000000..fcce8b9
--- /dev/null
+++ b/bbb_cape/src/cape/crc.c
@@ -0,0 +1,15 @@
+#include "cape/crc.h"
+
+#include <STM32F2XX.h>
+
+void crc_init(void) {
+  RCC->AHB1ENR |= RCC_AHB1ENR_CRCEN;
+}
+
+uint32_t crc_calculate(uint32_t *restrict data, size_t words) {
+  CRC->CR = 1;  // reset it
+  for (; data < data + words; ++data) {
+    CRC->DR = *data;
+  }
+  return CRC->DR;
+}
diff --git a/bbb_cape/src/cape/crc.h b/bbb_cape/src/cape/crc.h
new file mode 100644
index 0000000..2aeed82
--- /dev/null
+++ b/bbb_cape/src/cape/crc.h
@@ -0,0 +1,13 @@
+#ifndef CAPE_CRC_H_
+#define CAPE_CRC_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+
+void crc_init(void);
+
+// The second argument is the number of words to checksum, NOT the number of
+// bytes.
+uint32_t crc_calculate(uint32_t *restrict data, size_t words);
+
+#endif  // CAPE_CRC_H_
diff --git a/bbb_cape/src/cape/data_struct.h b/bbb_cape/src/cape/data_struct.h
index 051095e..fdcb064 100644
--- a/bbb_cape/src/cape/data_struct.h
+++ b/bbb_cape/src/cape/data_struct.h
@@ -15,9 +15,13 @@
 
   union {
     struct {
-      // In us since the cape last reset.
+      // In 1/3us since the cape last reset.
       uint64_t timestamp;
 
+      // The CRC32 (same algorithm as the checksum for the packet) of the whole
+      // contents of flash for the main code (aka what's in the .hex file).
+      uint32_t flash_checksum;
+
       struct {
         // If the current gyro_angle has been not updated because of a bad
         // reading from the sensor.
@@ -35,52 +39,14 @@
 
   union {
     struct {
-      int32_t left_drive;
-      int32_t right_drive;
-      int32_t shooter_angle;
-      int32_t shooter;
-      int32_t indexer;
-      int32_t wrist;
+      int32_t encoders[8];
 
-      int32_t capture_top_rise;
-      int32_t capture_top_fall;
-      int32_t capture_bottom_fall_delay;
-      int32_t capture_wrist_rise;
-      int32_t capture_shooter_angle_rise;
+      uint16_t analogs[8];
 
-      uint16_t battery_voltage;
-      uint16_t left_drive_hall;
-      uint16_t right_drive_hall;
-
-      int8_t top_rise_count;
-
-      int8_t top_fall_count;
-
-      int8_t bottom_rise_count;
-
-      int8_t bottom_fall_delay_count;
-      int8_t bottom_fall_count;
-
-      int8_t wrist_rise_count;
-
-      int8_t shooter_angle_rise_count;
-
-      struct {
-        uint8_t wrist_hall_effect : 1;
-        uint8_t angle_adjust_bottom_hall_effect : 1;
-        uint8_t top_disc : 1;
-        uint8_t bottom_disc : 1;
-        uint8_t loader_top : 1;
-        uint8_t loader_bottom : 1;
-      };
+      uint32_t digitals;
     } main;
     
     struct {
-      union {
-        struct {
-        };
-        uint16_t booleans;
-      };
     } bot3;
   };
 } __attribute__((aligned(8)));
diff --git a/bbb_cape/src/cape/encoder.c b/bbb_cape/src/cape/encoder.c
new file mode 100644
index 0000000..45b7d16
--- /dev/null
+++ b/bbb_cape/src/cape/encoder.c
@@ -0,0 +1,119 @@
+#include "cape/encoder.h"
+
+#include <STM32F2XX.h>
+
+#include "cape/util.h"
+
+// Here is where each encoder is hooked up:
+// 0: PC6,PC7 TIM8
+// 1: PC0,PC1 EXTI0,EXTI1
+// 2: PA0,PA1 TIM5(32)
+// 3: PA2,PA3 EXTI2,EXTI3
+// 4: PA8,PB0 TIM1
+// 5: PA5,PB3 TIM2(32)
+// 6: PA6,PB5 TIM3
+// 7: PB6,PB7 TIM4
+
+volatile int32_t encoder1_value = 0;
+volatile int32_t encoder3_value = 0;
+
+// 1.A
+void EXTI0_IRQHandler(void) {
+  EXTI->PR = EXTI_PR_PR0;
+  uint32_t inputs = GPIOA->IDR;
+  // This looks like a weird way to XOR the 2 inputs, but it compiles down to
+  // just 2 instructions, which is hard to beat.
+  if (((inputs >> 1) ^ inputs) & (1 << 0)) {
+    ++encoder1_value;
+  } else {
+    --encoder1_value;
+  }
+}
+
+// 1.B
+void EXTI1_IRQHandler(void) {
+  EXTI->PR = EXTI_PR_PR1;
+  uint32_t inputs = GPIOA->IDR;
+  if (((inputs >> 1) ^ inputs) & (1 << 0)) {
+    --encoder1_value;
+  } else {
+    ++encoder1_value;
+  }
+}
+
+// 3.A
+void EXTI2_IRQHandler(void) {
+  EXTI->PR = EXTI_PR_PR2;
+  uint32_t inputs = GPIOC->IDR;
+  if (((inputs >> 1) ^ inputs) & (1 << 2)) {
+    ++encoder3_value;
+  } else {
+    --encoder3_value;
+  }
+}
+
+// 3.B
+void EXTI3_IRQHandler(void) {
+  EXTI->PR = EXTI_PR_PR3;
+  uint32_t inputs = GPIOC->IDR;
+  if (((inputs >> 1) ^ inputs) & (1 << 2)) {
+    --encoder3_value;
+  } else {
+    ++encoder3_value;
+  }
+}
+
+static void encoder_setup(TIM_TypeDef *timer) {
+  timer->CR1 = TIM_CR1_UDIS;
+  timer->SMCR = 3;  // 4x quadrature encoder mode
+  timer->CCMR1 =
+      TIM_CCMR1_CC2S_0 | /* input pin 2 -> timer input 2 */
+      TIM_CCMR1_CC1S_0;  /* input pin 1 -> timer input 1*/
+  timer->EGR = TIM_EGR_UG;
+  timer->CR1 |= TIM_CR1_CEN;
+}
+
+void encoder_init(void) {
+  SYSCFG->EXTICR[0] =
+      SYSCFG_EXTICR1_EXTI0_PC |
+      SYSCFG_EXTICR1_EXTI1_PC |
+      SYSCFG_EXTICR1_EXTI2_PA |
+      SYSCFG_EXTICR1_EXTI3_PA;
+  EXTI->IMR |= EXTI_IMR_MR0 | EXTI_IMR_MR1 | EXTI_IMR_MR2 | EXTI_IMR_MR3;
+  EXTI->RTSR |= EXTI_RTSR_TR0 | EXTI_RTSR_TR1 | EXTI_RTSR_TR2 | EXTI_RTSR_TR3;
+  EXTI->FTSR |= EXTI_FTSR_TR0 | EXTI_FTSR_TR1 | EXTI_FTSR_TR2 | EXTI_FTSR_TR3;
+  NVIC_EnableIRQ(EXTI0_IRQn);
+  NVIC_EnableIRQ(EXTI1_IRQn);
+  NVIC_EnableIRQ(EXTI2_IRQn);
+  NVIC_EnableIRQ(EXTI3_IRQn);
+
+  gpio_setup_alt(GPIOA, 8, 1);
+  gpio_setup_alt(GPIOB, 0, 1);
+  RCC->APB2ENR |= RCC_APB2ENR_TIM1EN;
+  encoder_setup(TIM1);
+
+  gpio_setup_alt(GPIOA, 5, 1);
+  gpio_setup_alt(GPIOB, 3, 1);
+  RCC->APB1ENR |= RCC_APB1ENR_TIM2EN;
+  encoder_setup(TIM2);
+
+  gpio_setup_alt(GPIOA, 6, 2);
+  gpio_setup_alt(GPIOB, 5, 2);
+  RCC->APB1ENR |= RCC_APB1ENR_TIM3EN;
+  encoder_setup(TIM3);
+
+  gpio_setup_alt(GPIOB, 6, 2);
+  gpio_setup_alt(GPIOB, 7, 2);
+  RCC->APB1ENR |= RCC_APB1ENR_TIM4EN;
+  encoder_setup(TIM4);
+
+  gpio_setup_alt(GPIOA, 0, 2);
+  gpio_setup_alt(GPIOA, 1, 2);
+  RCC->APB1ENR |= RCC_APB1ENR_TIM5EN;
+  encoder_setup(TIM5);
+
+  gpio_setup_alt(GPIOC, 6, 3);
+  gpio_setup_alt(GPIOC, 7, 3);
+  RCC->APB2ENR |= RCC_APB2ENR_TIM8EN;
+  encoder_setup(TIM8);
+}
diff --git a/bbb_cape/src/cape/encoder.h b/bbb_cape/src/cape/encoder.h
new file mode 100644
index 0000000..5a69835
--- /dev/null
+++ b/bbb_cape/src/cape/encoder.h
@@ -0,0 +1,80 @@
+#ifndef CAPE_ENCODER_H_
+#define CAPE_ENCODER_H_
+
+#include <stdint.h>
+#include <limits.h>
+
+#include <STM32F2XX.h>
+
+void encoder_init(void);
+
+// Updates a signed 32-bit counter with a new 16-bit value. Assumes that the
+// value will not more than half-wrap between updates.
+// new is 32 bits so it doesn't have to get masked, but the value passed in must
+// be <= UINT16_MAX.
+// Useful for 16-bit encoder counters.
+static inline void counter_update_s32_u16(int32_t *restrict counter,
+                                          uint32_t new) {
+  static const uint16_t kHalf = 0xFFFF / 2;
+  uint16_t old = *counter & 0xFFFF;
+  int32_t counter_top = *counter ^ old;
+  int32_t delta = (int32_t)new - (int32_t)old;
+  int32_t new_counter;
+  if (__builtin_expect(delta < -kHalf, 0)) {
+    new_counter = counter_top - 0x10000;
+  } else if (__builtin_expect(delta > kHalf, 0)) {
+    new_counter = counter_top + 0x10000;
+  } else {
+    new_counter = counter_top;
+  }
+  *counter = new_counter | new;
+}
+
+// Updates an unsigned 64-bit counter with a new 16-bit value. Assumes that the
+// value will not wrap more than once between updates.
+// new is 32 bits so it doesn't have to get masked, but the value passed in must
+// be <= UINT16_MAX.
+// Useful for 16-bit timers being used for absolute timings.
+static inline void counter_update_u64_u16(uint64_t *restrict counter,
+                                          uint32_t new) {
+  uint16_t old = *counter & 0xFFFF;
+  int64_t counter_top = *counter ^ old;
+  if (__builtin_expect(new < old, 0)) {
+    *counter = counter_top + 0x10000;
+  } else {
+    *counter = counter_top;
+  }
+  *counter |= new;
+}
+
+// number is the 0-indexed number on the silkscreen
+static inline int32_t encoder_read(int number) {
+  static int32_t value0, value4, value6, value7;
+  extern volatile int32_t encoder1_value, encoder3_value;
+  switch (number) {
+    case 0:
+      counter_update_s32_u16(&value0, TIM8->CNT);
+      return value0;
+    case 1:
+      return encoder1_value;
+    case 2:
+      return TIM5->CNT;
+    case 3:
+      return encoder3_value;
+    case 4:
+      counter_update_s32_u16(&value4, TIM1->CNT);
+      return value4;
+    case 5:
+      return TIM2->CNT;
+    case 6:
+      counter_update_s32_u16(&value6, TIM3->CNT);
+      return value6;
+    case 7:
+      counter_update_s32_u16(&value7, TIM4->CNT);
+      return value7;
+    default:
+      return INT32_MAX;
+  }
+}
+
+#endif  // CAPE_ENCODER_H_
diff --git a/bbb_cape/src/cape/fill_packet.c b/bbb_cape/src/cape/fill_packet.c
index e8516a9..537c527 100644
--- a/bbb_cape/src/cape/fill_packet.c
+++ b/bbb_cape/src/cape/fill_packet.c
@@ -7,10 +7,38 @@
 #include "cape/uart_dma.h"
 #include "cape/uart_common.h"
 #include "cape/cows.h"
+#include "cape/encoder.h"
+#include "cape/crc.h"
+#include "cape/bootloader_handoff.h"
+
+#define TIMESTAMP_TIM TIM6
+#define RCC_APB1ENR_TIMESTAMP_TIMEN RCC_APB1ENR_TIM6EN
 
 static uint8_t buffer1[DATA_STRUCT_SEND_SIZE] __attribute__((aligned(4)));
 static uint8_t buffer2[DATA_STRUCT_SEND_SIZE] __attribute__((aligned(4)));
 
+static uint32_t flash_checksum;
+// These aren't really integers; they're (4-byte) variables whose addresses mark
+// various locations.
+extern uint8_t __etext, __data_start__, __data_end__;
+
+static inline void do_fill_packet(struct DataStruct *packet) {
+  static uint64_t timestamp = 0;
+  counter_update_u64_u16(&timestamp, TIMESTAMP_TIM->CNT);
+  packet->timestamp = timestamp;
+
+  packet->flash_checksum = flash_checksum;
+
+  packet->main.encoders[0] = encoder_read(0);
+  packet->main.encoders[1] = encoder_read(1);
+  packet->main.encoders[2] = encoder_read(2);
+  packet->main.encoders[3] = encoder_read(3);
+  packet->main.encoders[4] = encoder_read(4);
+  packet->main.encoders[5] = encoder_read(5);
+  packet->main.encoders[6] = encoder_read(6);
+  packet->main.encoders[7] = encoder_read(7);
+}
+
 // Fills the new packet with data.
 void uart_dma_callback(uint8_t *buffer) {
   struct {
@@ -22,23 +50,28 @@
                 The_size_of_the_data_is_wrong);
   struct DataStruct *packet = &data.packet;
 
-  CRC->CR = 1;  // reset it
-  uint32_t *p1;
-  memcpy(&p1, &packet, sizeof(void *));
-  {
-    uint32_t *restrict p = p1;
-    for (; p < (uint32_t *)(packet + 1); ++p) {
-      CRC->DR = *p;
-    }
-  }
-  data.checksum = CRC->DR;
+  do_fill_packet(packet);
+
+  uint32_t *p;
+  memcpy(&p, &packet, sizeof(void *));
+  data.checksum = crc_calculate(p, sizeof(*packet) / 4);
 
   memset(buffer, 0, 4);
   cows_stuff(&data, sizeof(data), buffer + 4);
 }
 
 void fill_packet_start(void) {
-  RCC->AHB1ENR |= RCC_AHB1ENR_CRCEN;
+  RCC->APB1ENR |= RCC_APB1ENR_TIMESTAMP_TIMEN;
+  TIMESTAMP_TIM->CR1 = TIM_CR1_UDIS;
+  TIMESTAMP_TIM->EGR = TIM_EGR_UG;
+  TIMESTAMP_TIM->CR1 |= TIM_CR1_CEN;
+
+  crc_init();
+  encoder_init();
+
+  uint8_t *flash_end = &__etext + (&__data_start__ - &__data_end__) + 8;
+  flash_checksum = crc_calculate((void *)MAIN_FLASH_START,
+                                 (size_t)(flash_end - MAIN_FLASH_START) / 4);
 
   uart_common_configure(3000000);
   uart_dma_configure(DATA_STRUCT_SEND_SIZE, buffer1, buffer2);
diff --git a/bbb_cape/src/cape/main.c b/bbb_cape/src/cape/main.c
index 02f8223..32b87da 100644
--- a/bbb_cape/src/cape/main.c
+++ b/bbb_cape/src/cape/main.c
@@ -1,6 +1,6 @@
 #include <STM32F2XX.h>
 
-#include "cape/bootloader_handoff.h"
+#include "cape/fill_packet.h"
 
 // The startup asm code defines this to the start of our exception vector table.
 extern uint32_t _vectors;
@@ -9,4 +9,6 @@
   // Change the vector table offset to use our vector table instead of the
   // bootloader's.
   SCB->VTOR = (uint32_t)&_vectors;
+
+  fill_packet_start();
 }
diff --git a/bbb_cape/src/cape/peripherial_usage.notes b/bbb_cape/src/cape/peripherial_usage.notes
index 5f361f7..b0dac40 100644
--- a/bbb_cape/src/cape/peripherial_usage.notes
+++ b/bbb_cape/src/cape/peripherial_usage.notes
@@ -24,8 +24,14 @@
 [encoders]
 encoder
   TIM1,TIM2,TIM3,TIM4,TIM5,TIM8
+  EXTI0,EXTI1,EXTI2,EXTI3
+  EXTI0_IRQ:0,EXTI1_IRQ:0,EXTI2_IRQ:0,EXTI3_IRQ:0
+
 
 [sensor packet sending]
 fill_packet
   TIM6
+
+[utilities]
+crc
   CRC
diff --git a/bbb_cape/src/cape/uart_byte.c b/bbb_cape/src/cape/uart_byte.c
index 76297e3..6bafdb6 100644
--- a/bbb_cape/src/cape/uart_byte.c
+++ b/bbb_cape/src/cape/uart_byte.c
@@ -4,8 +4,11 @@
 #include <STM32F2XX.h>
 
 #define TIMEOUT_TIM TIM7
+#define RCC_APB1ENR_TIMEOUT_TIMEN RCC_APB1ENR_TIM7EN
 
 void uart_byte_configure(void) {
+  RCC->APB1ENR |= RCC_APB1ENR_TIMEOUT_TIMEN;
+
   TIMEOUT_TIM->CR1 = TIM_CR1_UDIS;
 }
 
diff --git a/bbb_cape/src/cape/uart_byte.h b/bbb_cape/src/cape/uart_byte.h
index 7aa7f43..7a82e76 100644
--- a/bbb_cape/src/cape/uart_byte.h
+++ b/bbb_cape/src/cape/uart_byte.h
@@ -7,7 +7,7 @@
 void uart_byte_configure(void);
 
 // Spins until 1 byte is received or some amount of time. The timeout is
-// timeout_count*timeout_divider/30MHz.
+// timeout_count*(timeout_divider+1)/30MHz.
 // The result is <0 for timeout or the received byte.
 int uart_byte_receive(uint16_t timeout_count, uint16_t timeout_divider);
 
diff --git a/bbb_cape/src/cape/uart_common.c b/bbb_cape/src/cape/uart_common.c
index 77c796f..783c154 100644
--- a/bbb_cape/src/cape/uart_common.c
+++ b/bbb_cape/src/cape/uart_common.c
@@ -1,10 +1,17 @@
 #include "cape/uart_common.h"
 #include "cape/uart_common_private.h"
 
+#include "cape/util.h"
+
 #define FPCLK 60000000
 
+// The UART is on PA9 and PA10.
 void uart_common_configure(int baud) {
+  gpio_setup_alt(GPIOA, 9, 7);
+  gpio_setup_alt(GPIOA, 10, 7);
+  GPIOA->OSPEEDR |= GPIO_OSPEEDER_OSPEEDR9;  // we want to go FAST!
   RCC->APB2ENR |= RCC_APB2ENR_USART1EN;
+
   // baud = 60MHz / (8 * (2 - OVER8) * (mantissa / fraction))
   int fraction = 8;  // the biggest it can be with OVER8=0
   int mantissa = FPCLK * (16 /* 8 * (2 - OVER8) */ / fraction) / baud;
diff --git a/bbb_cape/src/cape/util.h b/bbb_cape/src/cape/util.h
index dac30d5..b27cc77 100644
--- a/bbb_cape/src/cape/util.h
+++ b/bbb_cape/src/cape/util.h
@@ -1,6 +1,8 @@
 #ifndef CAPE_UTIL_H_
 #define CAPE_UTIL_H_
 
+#include <STM32F2XX.h>
+
 #define ALIAS_WEAK(f) __attribute__ ((weak, alias (#f)))
 
 // MSG has to be separated_with_spaces.
@@ -11,4 +13,26 @@
   __asm__ __volatile__("" ::: "memory");
 }
 
+// Sets number_of_bits (shifted left shift number of slots) to value in
+// variable.
+// This means that the total shift is number_bits*shift.
+#define SET_BITS(variable, number_bits, value, shift) do { \
+  variable = (((variable) & \
+               ~(((1 << (number_bits)) - 1) << (shift * (number_bits)))) | \
+              ((value) << (shift * (number_bits)))); \
+} while (0);
+
+// A convenient way to set up a GPIO pin for some alternate function without
+// missing part or messing up which bits need setting to what.
+// pin is the 0-indexed pin number.
+// afr is 0-0xF for the various alternate functions.
+static inline void gpio_setup_alt(GPIO_TypeDef *port, int pin, int afr) {
+  SET_BITS(port->MODER, 2, 2 /* alternate function */, pin);
+  if (pin < 8) {
+    SET_BITS(port->AFR[0], 4, afr, pin);
+  } else {
+    SET_BITS(port->AFR[1], 4, afr, (pin - 8));
+  }
+}
+
 #endif  // CAPE_UTIL_H_