got sending packets actually working
diff --git a/bbb_cape/src/bbb/crc.cc b/bbb_cape/src/bbb/crc.cc
index 214f86f..b16eb94 100644
--- a/bbb_cape/src/bbb/crc.cc
+++ b/bbb_cape/src/bbb/crc.cc
@@ -32,11 +32,13 @@
 
   uint32_t r = 0xFFFFFFFF;
 
-  for (size_t i = 0; i < length; ++i) {
-    r = (r << 8) ^ table[(r >> 24) ^ data[i]];
+  for (size_t i = 0; i < (length / 4); ++i) {
+    for (int ii = 3; ii >= 0; --ii) {
+      r = (r << 8) ^ table[(r >> 24) ^ data[i * 4 + ii]];
+    }
   }
 
-  return ~r;
+  return r;
 }
 
 }  // namespace cape
diff --git a/bbb_cape/src/bbb/uart_reader.cc b/bbb_cape/src/bbb/uart_reader.cc
index 0a3f3c6..7b5e116 100644
--- a/bbb_cape/src/bbb/uart_reader.cc
+++ b/bbb_cape/src/bbb/uart_reader.cc
@@ -14,6 +14,8 @@
 #include "bbb_cape/src/cape/cows.h"
 #include "bbb/crc.h"
 
+#define PACKET_SIZE (DATA_STRUCT_SEND_SIZE - 4)
+
 // This is the code for receiving data from the cape via UART.
 // NOTE: In order for this to work, you MUST HAVE
 // "capemgr.enable_partno=BB_UART1"
@@ -32,9 +34,10 @@
 
 UartReader::UartReader(int32_t baud_rate)
     : baud_rate_(baud_rate),
-      buf_(new AlignedChar[DATA_STRUCT_SEND_SIZE]),
+      buf_(new AlignedChar[PACKET_SIZE]),
+      unstuffed_data_(new AlignedChar[PACKET_SIZE - 4]),
       fd_(open(device, O_RDWR | O_NOCTTY)) {
-  static_assert((DATA_STRUCT_SEND_SIZE % 4) == 0,
+  static_assert((PACKET_SIZE % 4) == 0,
                 "We can't do checksums of lengths that aren't multiples of 4.");
 
   if (fd_ < 0) {
@@ -71,7 +74,7 @@
     options.c_oflag = 0;
     options.c_lflag = 0;
     options.c_cc[VMIN] = 0;
-    options.c_cc[VTIME] = 1;
+    options.c_cc[VTIME] = 10;
     if (tcsetattr(fd_, TCSANOW, &options) != 0) {
       LOG(FATAL, "tcsetattr(%d, TCSANOW, %p) failed with %d: %s\n",
           fd_, &options, errno, strerror(errno));
@@ -117,91 +120,107 @@
 
 UartReader::~UartReader() {
   delete buf_;
+  delete unstuffed_data_;
   if (fd_ > 0) close(fd_);
 }
 
+// TODO(brians): Figure out why this (sometimes?) gets confused right after
+// flashing the cape.
 bool UartReader::FindPacket() {
   // How many 0 bytes we've found at the front so far.
   int zeros_found = 0;
-  // How many bytes of the packet we've read in (or -1 if we don't know where
-  // the packet is).
-  int packet_bytes = -1;
   while (true) {
-    size_t already_read = ::std::max(packet_bytes, 0);
+    size_t already_read = ::std::max(0, packet_bytes_);
     ssize_t new_bytes =
-        read(fd_, buf_ + already_read, DATA_STRUCT_SEND_SIZE - already_read);
-    LOG(DEBUG, "read %zd, wanted %d\n", new_bytes,
-        DATA_STRUCT_SEND_SIZE - already_read);
-    for (int i = 0; i < new_bytes; ++i) {
-      LOG(DEBUG, "%x\n", buf_[i]);
-    }
+        read(fd_, buf_ + already_read, PACKET_SIZE - already_read);
     if (new_bytes < 0) {
       if (errno == EINTR) continue;
-      LOG(WARNING, "read(%d, %p, %zd) failed with %d: %s\n",
-          fd_, buf_ + already_read, DATA_STRUCT_SEND_SIZE - already_read,
+      LOG(FATAL, "read(%d, %p, %zd) failed with %d: %s\n",
+          fd_, buf_ + already_read, PACKET_SIZE - already_read,
           errno, strerror(errno));
       return false;
     }
 
-    if (packet_bytes != -1) {  // we think there's a packet at the beginning of
-                               // our buffer
-      for (int to_check = packet_bytes; packet_bytes + new_bytes; ++to_check) {
-        // We shouldn't find any 0s in the middle of what should be a packet.
+    if (packet_bytes_ == -1) {
+      for (size_t to_check = already_read; to_check < already_read + new_bytes;
+           ++to_check) {
         if (buf_[to_check] == 0) {
-          packet_bytes = -1;
-          memmove(buf_, buf_ + to_check, new_bytes - to_check);
-          new_bytes -= to_check;
-          break;
-        }
-      }
-      if (packet_bytes != -1) {
-        packet_bytes += new_bytes;
-        if (packet_bytes == DATA_STRUCT_SEND_SIZE) return true;
-      }
-    }
-    // This can't just be an else because the above code might set it to -1 if
-    // it finds 0s in the middle of a packet.
-    if (packet_bytes == -1) {
-      // Find the beginning of the packet (aka look for four zero bytes).
-      for (ssize_t checked = 0; checked < new_bytes; ++checked) {
-        if (buf_[checked] == 0) {
           ++zeros_found;
           if (zeros_found == 4) {
-            packet_bytes = new_bytes - checked - 1;
-            memmove(buf_, buf_ + checked + 1, packet_bytes);
-            break;
+            packet_bytes_ = 0;
+            zeros_found = 0;
+            new_bytes -= to_check + 1;
+            memmove(buf_, buf_ + to_check + 1, new_bytes);
+            to_check = 0;
           }
         } else {
           zeros_found = 0;
         }
       }
     }
+    if (packet_bytes_ != -1) {  // if we decided that these are good bytes
+      packet_bytes_ += new_bytes;
+      if (packet_bytes_ == PACKET_SIZE) return true;
+    }
   }
 }
 
-bool UartReader::GetPacket(DataStruct *packet) {
-  if (!FindPacket()) return false;
-
-  uint32_t unstuffed = cows_unstuff(reinterpret_cast<uint32_t *>(buf_),
-                                    DATA_STRUCT_SEND_SIZE / 4,
-                                    reinterpret_cast<uint32_t *>(packet));
+bool UartReader::ProcessPacket() {
+  uint32_t unstuffed =
+      cows_unstuff(reinterpret_cast<uint32_t *>(buf_), PACKET_SIZE,
+                   reinterpret_cast<uint32_t *>(unstuffed_data_));
   if (unstuffed == 0) {
     LOG(WARNING, "invalid packet\n");
-  } else if (unstuffed != sizeof(packet)) {
+    return false;
+  } else if (unstuffed != (PACKET_SIZE - 4) / 4) {
     LOG(WARNING, "packet is %" PRIu32 " words instead of %" PRIu32 "\n",
-        unstuffed, DATA_STRUCT_SEND_SIZE / 4);
+        unstuffed, (PACKET_SIZE - 4) / 4);
+    return false;
   }
 
   // Make sure the checksum checks out.
-  uint32_t checksum;
-  memcpy(&checksum, buf_ + DATA_STRUCT_SEND_SIZE - 4, 4);
-  if (cape::CalculateChecksum(reinterpret_cast<uint8_t *>(packet),
-                              sizeof(DataStruct)) != checksum) {
-    LOG(WARNING, "Rejecting packet due to checksum failure.\n");
+  uint32_t sent_checksum;
+  memcpy(&sent_checksum, unstuffed_data_ + PACKET_SIZE - 8, 4);
+  uint32_t calculated_checksum = cape::CalculateChecksum(
+      reinterpret_cast<uint8_t *>(unstuffed_data_), PACKET_SIZE - 8);
+  if (sent_checksum != calculated_checksum) {
+    LOG(WARNING, "sent checksum: %" PRIx32 " vs calculated: %" PRIx32"\n",
+        sent_checksum, calculated_checksum);
     return false;
   }
 
   return true;
 }
 
+bool UartReader::GetPacket(DataStruct *packet) {
+  static_assert(sizeof(*packet) <= PACKET_SIZE - 8,
+                "output data type is too big");
+
+  if (!FindPacket()) return false;
+
+  if (!ProcessPacket()) {
+    packet_bytes_ = -1;
+    int zeros = 0;
+    for (int i = 0; i < PACKET_SIZE; ++i) {
+      if (buf_[i] == 0) {
+        ++zeros;
+        if (zeros == 4) {
+          LOG(INFO, "found another packet start at %d\n", i);
+          packet_bytes_ = PACKET_SIZE - (i + 1);
+          memmove(buf_, buf_ + i + 1, packet_bytes_);
+          return false;
+        }
+      } else {
+        zeros = 0;
+      }
+    }
+    return false;
+  } else {
+    packet_bytes_ = -1;
+  }
+  memcpy(packet, unstuffed_data_, sizeof(*packet));
+
+  return true;
+}
+
 }  // namespace bbb
diff --git a/bbb_cape/src/bbb/uart_reader.h b/bbb_cape/src/bbb/uart_reader.h
index 9d5a510..ea0cae0 100644
--- a/bbb_cape/src/bbb/uart_reader.h
+++ b/bbb_cape/src/bbb/uart_reader.h
@@ -3,6 +3,8 @@
 
 #include <stdint.h>
 
+#include <memory>
+
 #define DATA_STRUCT_NAME DataStruct
 #include "cape/data_struct.h"
 #undef DATA_STRUCT_NAME
@@ -24,11 +26,22 @@
   // packet is invalid in some way.
   bool FindPacket();
 
-  typedef char __attribute__((aligned(8))) AlignedChar;
+  // Processes a packet currently in buf_ and leaves the result in
+  // unstuffed_data_.
+  // Returns true if it succeeds or false if there was something wrong with the
+  // data.
+  bool ProcessPacket();
+
+  typedef char __attribute__((aligned(4))) AlignedChar;
 
   const int32_t baud_rate_;
   AlignedChar *const buf_;
+  AlignedChar *const unstuffed_data_;
   const int fd_;
+
+  // How many bytes of the packet we've read in (or -1 if we don't know where
+  // the packet is).
+  int packet_bytes_ = -1;
 };
 
 }  // namespace bbb
diff --git a/bbb_cape/src/bbb/uart_reader_main.cc b/bbb_cape/src/bbb/uart_reader_main.cc
index d0fd028..aadd902 100644
--- a/bbb_cape/src/bbb/uart_reader_main.cc
+++ b/bbb_cape/src/bbb/uart_reader_main.cc
@@ -23,8 +23,7 @@
 #endif
 
   //::bbb::UartReader receiver(3000000);
-  ::bbb::UartReader receiver(300000);
-  //::bbb::UartReader receiver(19200);
+  ::bbb::UartReader receiver(30000);
 
   Time last_packet_time = Time::Now();
   while (true) {
diff --git a/bbb_cape/src/cape/cows.c b/bbb_cape/src/cape/cows.c
index 16ed1fc..bad2b30 100644
--- a/bbb_cape/src/cape/cows.c
+++ b/bbb_cape/src/cape/cows.c
@@ -51,7 +51,7 @@
     for (uint32_t i = 1; i < code; ++i) {
       destination[destination_index++] = source[source_index++];
     }
-    if (code != UINT32_MAX && source_index != source_length) {
+    if (code != UINT32_MAX && source_index != source_length / 4) {
       destination[destination_index++] = 0;
     }
   }
diff --git a/bbb_cape/src/cape/fill_packet.c b/bbb_cape/src/cape/fill_packet.c
index eb7d565..4f1c717 100644
--- a/bbb_cape/src/cape/fill_packet.c
+++ b/bbb_cape/src/cape/fill_packet.c
@@ -58,11 +58,11 @@
                 The_size_of_the_data_is_wrong);
   struct DataStruct *packet = &data.packet;
 
-  do_fill_packet(packet);
+  //do_fill_packet(packet);
 
   uint32_t *p;
   memcpy(&p, &packet, sizeof(void *));
-  data.checksum = crc_calculate(p, sizeof(*packet) / 4);
+  data.checksum = crc_calculate(p, (sizeof(data) - 4) / 4);
 
   ((uint32_t *)buffer)[0] = 0;
   cows_stuff(&data, sizeof(data), buffer + 4);
@@ -87,19 +87,6 @@
   //gyro_init();
 
   //uart_common_configure(3000000);
-  uart_common_configure(300000);
-  //uart_common_configure(19200);
-#if 0
-  //for (int i = 0; i < 5; ++i) uart_byte_send(255);
-  for (int i = 0; i < 10; ++i) uart_byte_send(i + 20);
-  //uart_byte_send('a');
-  //uart_byte_send('b');
-  //uart_byte_send('c');
-  //uart_byte_send('d');
-  led_set(LED_DB, 1);
-  (void)buffer1;
-  (void)buffer2;
-#else
+  uart_common_configure(30000);
   uart_dma_configure(DATA_STRUCT_SEND_SIZE, buffer1, buffer2);
-#endif
 }
diff --git a/bbb_cape/src/cape/uart_common.c b/bbb_cape/src/cape/uart_common.c
index 0cb7507..7b05102 100644
--- a/bbb_cape/src/cape/uart_common.c
+++ b/bbb_cape/src/cape/uart_common.c
@@ -26,7 +26,7 @@
   UART->CR1 =
       //USART_CR1_M /* 9th bit for the parity */ |
       //USART_CR1_PCE /* enable parity (even by default) */ |
-      USART_CR1_TE /* enable transmitter */ |
-      USART_CR1_RE /* enable receiver */;
+      //USART_CR1_OVER8 /* support going faster */ |
+      0;
   UART->CR1 |= USART_CR1_UE;  // enable it
 }
diff --git a/bbb_cape/src/cape/uart_dma.c b/bbb_cape/src/cape/uart_dma.c
index 8493632..ed576b4 100644
--- a/bbb_cape/src/cape/uart_dma.c
+++ b/bbb_cape/src/cape/uart_dma.c
@@ -46,28 +46,30 @@
   uart_dma_callback(buffer1);
 
   UART->CR3 = USART_CR3_DMAT;
+  UART->CR1 |= USART_CR1_TE;
 
   RCC->AHB1ENR |= RCC_AHB1ENR_DMAEN;
-  DMA_Stream->CR = DMA_CHANNEL_NUMBER << 25 |
-      DMA_SxCR_DBM /* enable double buffer mode */ |
-      2 << 16 /* priority */ |
-      //2 << 13 /* memory data size = 32 bits */ |
-      0 << 13 /* memory data size = 8 bits */ |
-      0 << 11 /* peripherial data size = 8 bits */ |
-      DMA_SxCR_MINC /* increment memory address */ |
-      1 << 6 /* memory to peripherial */ |
-      DMA_SxCR_HTIE | DMA_SxCR_DMEIE |
-      DMA_SxCR_TCIE | DMA_SxCR_TEIE;
+  DMA_Stream->CR = 0;
+  while (DMA_Stream->CR & DMA_SxCR_EN);  // make sure it's disabled
   DMA_Stream->PAR = (uint32_t)&UART->DR;
   DMA_Stream->M0AR = (uint32_t)buffer1;
   DMA_Stream->M1AR = (uint32_t)buffer2;
   // This is measured in chunks of PSIZE bytes, not MSIZE.
   DMA_Stream->NDTR = bytes;
-  DMA_FCR = 0xF << DMA_SR_SHIFT;
+  DMA_Stream->CR = DMA_CHANNEL_NUMBER << 25 |
+      DMA_SxCR_DBM /* enable double buffer mode */ |
+      2 << 16 /* priority */ |
+      2 << 13 /* memory data size = 32 bits */ |
+      0 << 11 /* peripherial data size = 8 bits */ |
+      DMA_SxCR_MINC /* increment memory address */ |
+      1 << 6 /* memory to peripherial */ |
+      //DMA_SxCR_PFCTRL /* peripherial controls flow */ |
+      DMA_SxCR_TCIE | DMA_SxCR_TEIE;
   DMA_Stream->FCR =
       DMA_SxFCR_DMDIS /* disable direct mode (enable the FIFO) */ |
-      //1 /* 1/2 full threshold */;
-      3 /* 100% full threshold */;
+      1 /* 1/2 full threshold */;
+  UART->SR = ~USART_SR_TC;
+  DMA_FCR = 0xF << DMA_SR_SHIFT;
   DMA_Stream->CR |= DMA_SxCR_EN;  // enable it
   NVIC_SetPriority(DMA_Stream_IRQn, 8);
   NVIC_EnableIRQ(DMA_Stream_IRQn);