Squashed 'third_party/apriltag/' content from commit 3e8e974d0

git-subtree-dir: third_party/apriltag
git-subtree-split: 3e8e974d0d8d6ab318abf56d87506d15d7f2cc35
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
Change-Id: I04ba3cb2106b6813a1013d57aa8074c26f856598
diff --git a/common/debug_print.h b/common/debug_print.h
new file mode 100644
index 0000000..77e0d78
--- /dev/null
+++ b/common/debug_print.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+#pragma once
+
+#if !defined(NDEBUG) || defined(_DEBUG)
+
+#include <string.h>
+#include <stdio.h>
+#define DEBUG 1
+
+#else
+#define DEBUG 0
+#endif
+
+#define debug_print(fmt, ...) \
+        do { if (DEBUG) fprintf(stderr, "%s:%d:%s(): " fmt, strrchr("/"__FILE__,'/')+1, \
+                                __LINE__, __func__, ##__VA_ARGS__); fflush(stderr);} while (0)
diff --git a/common/doubles.h b/common/doubles.h
new file mode 100644
index 0000000..fd5fc82
--- /dev/null
+++ b/common/doubles.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#define TNAME double
+#include "doubles_floats_impl.h"
+#undef TNAME
diff --git a/common/doubles_floats_impl.h b/common/doubles_floats_impl.h
new file mode 100644
index 0000000..1817e08
--- /dev/null
+++ b/common/doubles_floats_impl.h
@@ -0,0 +1,1036 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <float.h>
+
+#include "matd.h"
+#include "math_util.h"
+
+// XXX Write unit tests for me!
+// XXX Rewrite matd_coords in terms of this.
+
+/*
+  This file provides conversions between the following formats:
+
+  quaternion (TNAME[4], { w, x, y, z})
+
+  xyt  (translation in x, y, and rotation in radians.)
+
+  xytcov  (xyt as a TNAME[3] followed by covariance TNAME[9])
+
+  xy, xyz  (translation in x, y, and z)
+
+  mat44 (4x4 rigid-body transformation matrix, row-major
+  order. Conventions: We assume points are projected via right
+  multiplication. E.g., p' = Mp.) Note: some functions really do rely
+  on it being a RIGID, scale=1 transform.
+
+  angleaxis (TNAME[4], { angle-rads, x, y, z }
+
+  xyzrpy (translation x, y, z, euler angles)
+
+  Roll Pitch Yaw are evaluated in the order: roll, pitch, then yaw. I.e.,
+  rollPitchYawToMatrix(rpy) = rotateZ(rpy[2]) * rotateY(rpy[1]) * Rotatex(rpy[0])
+*/
+
+#define TRRFN(root, suffix) root ## suffix
+#define TRFN(root, suffix) TRRFN(root, suffix)
+#define TFN(suffix) TRFN(TNAME, suffix)
+
+// if V is null, returns null.
+static inline TNAME *TFN(s_dup)(const TNAME *v, int len)
+{
+    if (!v)
+        return NULL;
+
+    TNAME *r = (TNAME*)malloc(len * sizeof(TNAME));
+    memcpy(r, v, len * sizeof(TNAME));
+    return r;
+}
+
+static inline void TFN(s_print)(const TNAME *a, int len, const char *fmt)
+{
+    for (int i = 0; i < len; i++)
+        printf(fmt, a[i]);
+    printf("\n");
+}
+
+static inline void TFN(s_print_mat)(const TNAME *a, int nrows, int ncols, const char *fmt)
+{
+    for (int i = 0; i < nrows * ncols; i++) {
+        printf(fmt, a[i]);
+        if ((i % ncols) == (ncols - 1))
+            printf("\n");
+    }
+}
+
+static inline void TFN(s_print_mat44)(const TNAME *a, const char *fmt)
+{
+    for (int i = 0; i < 4 * 4; i++) {
+        printf(fmt, a[i]);
+        if ((i % 4) == 3)
+            printf("\n");
+    }
+}
+
+static inline void TFN(s_add)(const TNAME *a, const TNAME *b, int len, TNAME *r)
+{
+    for (int i = 0; i < len; i++)
+        r[i] = a[i] + b[i];
+}
+
+static inline void TFN(s_subtract)(const TNAME *a, const TNAME *b, int len, TNAME *r)
+{
+    for (int i = 0; i < len; i++)
+        r[i] = a[i] - b[i];
+}
+
+static inline void TFN(s_scale)(TNAME s, const TNAME *v, int len, TNAME *r)
+{
+    for (int i = 0; i < len; i++)
+        r[i] = s * v[i];
+}
+
+static inline TNAME TFN(s_dot)(const TNAME *a, const TNAME *b, int len)
+{
+    TNAME acc = 0;
+    for (int i = 0; i < len; i++)
+        acc += a[i] * b[i];
+    return acc;
+}
+
+static inline TNAME TFN(s_distance)(const TNAME *a, const TNAME *b, int len)
+{
+    TNAME acc = 0;
+    for (int i = 0; i < len; i++)
+        acc += (a[i] - b[i])*(a[i] - b[i]);
+    return (TNAME)sqrt(acc);
+}
+
+static inline TNAME TFN(s_squared_distance)(const TNAME *a, const TNAME *b, int len)
+{
+    TNAME acc = 0;
+    for (int i = 0; i < len; i++)
+        acc += (a[i] - b[i])*(a[i] - b[i]);
+    return acc;
+}
+
+static inline TNAME TFN(s_squared_magnitude)(const TNAME *v, int len)
+{
+    TNAME acc = 0;
+    for (int i = 0; i < len; i++)
+        acc += v[i]*v[i];
+    return acc;
+}
+
+static inline TNAME TFN(s_magnitude)(const TNAME *v, int len)
+{
+    TNAME acc = 0;
+    for (int i = 0; i < len; i++)
+        acc += v[i]*v[i];
+    return (TNAME)sqrt(acc);
+}
+
+static inline void TFN(s_normalize)(const TNAME *v, int len, TNAME *r)
+{
+    TNAME mag = TFN(s_magnitude)(v, len);
+    for (int i = 0; i < len; i++)
+        r[i] = v[i] / mag;
+}
+
+static inline void TFN(s_normalize_self)(TNAME *v, int len)
+{
+    TNAME mag = TFN(s_magnitude)(v, len);
+    for (int i = 0; i < len; i++)
+        v[i] /= mag;
+}
+
+static inline void TFN(s_scale_self)(TNAME *v, int len, double scale)
+{
+    for (int i = 0; i < len; i++)
+        v[i] = (TNAME)(v[i] * scale);
+}
+
+static inline void TFN(s_quat_rotate)(const TNAME q[4], const TNAME v[3], TNAME r[3])
+{
+    TNAME t2, t3, t4, t5, t6, t7, t8, t9, t10;
+
+    t2 = q[0]*q[1];
+    t3 = q[0]*q[2];
+    t4 = q[0]*q[3];
+    t5 = -q[1]*q[1];
+    t6 = q[1]*q[2];
+    t7 = q[1]*q[3];
+    t8 = -q[2]*q[2];
+    t9 = q[2]*q[3];
+    t10 = -q[3]*q[3];
+
+    r[0] = 2*((t8+t10)*v[0] + (t6-t4)*v[1]  + (t3+t7)*v[2]) + v[0];
+    r[1] = 2*((t4+t6)*v[0]  + (t5+t10)*v[1] + (t9-t2)*v[2]) + v[1];
+    r[2] = 2*((t7-t3)*v[0]  + (t2+t9)*v[1]  + (t5+t8)*v[2]) + v[2];
+}
+
+static inline void TFN(s_quat_multiply)(const TNAME a[4], const TNAME b[4], TNAME r[4])
+{
+    r[0] = a[0]*b[0] - a[1]*b[1] - a[2]*b[2] - a[3]*b[3];
+    r[1] = a[0]*b[1] + a[1]*b[0] + a[2]*b[3] - a[3]*b[2];
+    r[2] = a[0]*b[2] - a[1]*b[3] + a[2]*b[0] + a[3]*b[1];
+    r[3] = a[0]*b[3] + a[1]*b[2] - a[2]*b[1] + a[3]*b[0];
+}
+
+static inline void TFN(s_quat_inverse)(const TNAME q[4], TNAME r[4])
+{
+    TNAME mag = TFN(s_magnitude)(q, 4);
+    r[0] = q[0]/mag;
+    r[1] = -q[1]/mag;
+    r[2] = -q[2]/mag;
+    r[3] = -q[3]/mag;
+}
+
+static inline void TFN(s_copy)(const TNAME *src, TNAME *dst, int n)
+{
+    memcpy(dst, src, n * sizeof(TNAME));
+}
+
+static inline void TFN(s_xyt_copy)(const TNAME xyt[3], TNAME r[3])
+{
+    TFN(s_copy)(xyt, r, 3);
+}
+
+static inline void TFN(s_xyt_to_mat44)(const TNAME xyt[3], TNAME r[16])
+{
+    TNAME s = (TNAME)sin(xyt[2]), c = (TNAME)cos(xyt[2]);
+    memset(r, 0, sizeof(TNAME)*16);
+    r[0] = c;
+    r[1] = -s;
+    r[3] = xyt[0];
+    r[4] = s;
+    r[5] = c;
+    r[7] = xyt[1];
+    r[10] = 1;
+    r[15] = 1;
+}
+
+static inline void TFN(s_xyt_transform_xy)(const TNAME xyt[3], const TNAME xy[2], TNAME r[2])
+{
+    TNAME s = (TNAME)sin(xyt[2]), c = (TNAME)cos(xyt[2]);
+    r[0] = c*xy[0] - s*xy[1] + xyt[0];
+    r[1] = s*xy[0] + c*xy[1] + xyt[1];
+}
+
+static inline void TFN(s_mat_transform_xyz)(const TNAME M[16], const TNAME xyz[3], TNAME r[3])
+{
+    r[0] = M[0]*xyz[0] + M[1]*xyz[1] + M[2]*xyz[2]  + M[3];
+    r[1] = M[4]*xyz[0] + M[5]*xyz[1] + M[6]*xyz[2]  + M[7];
+    r[2] = M[8]*xyz[0] + M[9]*xyz[1] + M[10]*xyz[2] + M[11];
+}
+
+static inline void TFN(s_quat_to_angleaxis)(const TNAME _q[4], TNAME r[4])
+{
+    TNAME q[4];
+    TFN(s_normalize)(_q, 4, q);
+
+    // be polite: return an angle from [-pi, pi]
+    // use atan2 to be 4-quadrant safe
+    TNAME mag = TFN(s_magnitude)(&q[1], 3);
+    r[0] = (TNAME)mod2pi(2 * atan2(mag, q[0]));
+    if (mag != 0) {
+        r[1] = q[1] / mag;
+        r[2] = q[2] / mag;
+        r[3] = q[3] / mag;
+    } else {
+        r[1] = 1;
+        r[2] = 0;
+        r[3] = 0;
+    }
+}
+
+static inline void TFN(s_angleaxis_to_quat)(const TNAME aa[4], TNAME q[4])
+{
+    TNAME rad = aa[0];
+    q[0] = (TNAME)cos(rad / 2.0);
+    TNAME s = (TNAME)sin(rad / 2.0);
+
+    TNAME v[3] = { aa[1], aa[2], aa[3] };
+    TFN(s_normalize)(v, 3, v);
+
+    q[1] = s * v[0];
+    q[2] = s * v[1];
+    q[3] = s * v[2];
+}
+
+static inline void TFN(s_quat_to_mat44)(const TNAME q[4], TNAME r[16])
+{
+    TNAME w = q[0], x = q[1], y = q[2], z = q[3];
+
+    r[0] = w*w + x*x - y*y - z*z;
+    r[1] = 2*x*y - 2*w*z;
+    r[2] = 2*x*z + 2*w*y;
+    r[3] = 0;
+
+    r[4] = 2*x*y + 2*w*z;
+    r[5] = w*w - x*x + y*y - z*z;
+    r[6] = 2*y*z - 2*w*x;
+    r[7] = 0;
+
+    r[8] = 2*x*z - 2*w*y;
+    r[9] = 2*y*z + 2*w*x;
+    r[10] = w*w - x*x - y*y + z*z;
+    r[11] = 0;
+
+    r[12] = 0;
+    r[13] = 0;
+    r[14] = 0;
+    r[15] = 1;
+}
+
+/* Returns the skew-symmetric matrix V such that V*w = v x w (cross product).
+   Sometimes denoted [v]_x or \hat{v}.
+   [  0 -v3  v2
+     v3   0 -v1
+    -v2  v1   0]
+ */
+static inline void TFN(s_cross_matrix)(const TNAME v[3], TNAME V[9])
+{
+    V[0] = 0;
+    V[1] = -v[2];
+    V[2] = v[1];
+    V[3] = v[2];
+    V[4] = 0;
+    V[5] = -v[0];
+    V[6] = -v[1];
+    V[7] = v[0];
+    V[8] = 0;
+}
+
+static inline void TFN(s_angleaxis_to_mat44)(const TNAME aa[4], TNAME r[16])
+{
+    TNAME q[4];
+
+    TFN(s_angleaxis_to_quat)(aa, q);
+    TFN(s_quat_to_mat44)(q, r);
+}
+
+static inline void TFN(s_quat_xyz_to_mat44)(const TNAME q[4], const TNAME xyz[3], TNAME r[16])
+{
+    TFN(s_quat_to_mat44)(q, r);
+
+    if (xyz != NULL) {
+        r[3] = xyz[0];
+        r[7] = xyz[1];
+        r[11] = xyz[2];
+    }
+}
+
+static inline void TFN(s_rpy_to_quat)(const TNAME rpy[3], TNAME quat[4])
+{
+    TNAME roll = rpy[0], pitch = rpy[1], yaw = rpy[2];
+
+    TNAME halfroll = roll / 2;
+    TNAME halfpitch = pitch / 2;
+    TNAME halfyaw = yaw / 2;
+
+    TNAME sin_r2 = (TNAME)sin(halfroll);
+    TNAME sin_p2 = (TNAME)sin(halfpitch);
+    TNAME sin_y2 = (TNAME)sin(halfyaw);
+
+    TNAME cos_r2 = (TNAME)cos(halfroll);
+    TNAME cos_p2 = (TNAME)cos(halfpitch);
+    TNAME cos_y2 = (TNAME)cos(halfyaw);
+
+    quat[0] = cos_r2 * cos_p2 * cos_y2 + sin_r2 * sin_p2 * sin_y2;
+    quat[1] = sin_r2 * cos_p2 * cos_y2 - cos_r2 * sin_p2 * sin_y2;
+    quat[2] = cos_r2 * sin_p2 * cos_y2 + sin_r2 * cos_p2 * sin_y2;
+    quat[3] = cos_r2 * cos_p2 * sin_y2 - sin_r2 * sin_p2 * cos_y2;
+}
+
+// Reference: "A tutorial on SE(3) transformation parameterizations and
+// on-manifold optimization" by Jose-Luis Blanco
+static inline void TFN(s_quat_to_rpy)(const TNAME q[4], TNAME rpy[3])
+{
+    const TNAME qr = q[0];
+    const TNAME qx = q[1];
+    const TNAME qy = q[2];
+    const TNAME qz = q[3];
+
+    TNAME disc = qr*qy - qx*qz;
+
+    if (fabs(disc+0.5) < DBL_EPSILON) {         // near -1/2
+        rpy[0] = 0;
+        rpy[1] = (TNAME)(-M_PI/2);
+        rpy[2] = (TNAME)(2 * atan2(qx, qr));
+    }
+    else if (fabs(disc-0.5) < DBL_EPSILON) {    // near  1/2
+        rpy[0] = 0;
+        rpy[1] = (TNAME)(M_PI/2);
+        rpy[2] = (TNAME)(-2 * atan2(qx, qr));
+    }
+    else {
+        // roll
+        TNAME roll_a = 2 * (qr*qx + qy*qz);
+        TNAME roll_b = 1 - 2 * (qx*qx + qy*qy);
+        rpy[0] = (TNAME)atan2(roll_a, roll_b);
+
+        // pitch
+        rpy[1] = (TNAME)asin(2*disc);
+
+        // yaw
+        TNAME yaw_a = 2 * (qr*qz + qx*qy);
+        TNAME yaw_b = 1 - 2 * (qy*qy + qz*qz);
+        rpy[2] = (TNAME)atan2(yaw_a, yaw_b);
+    }
+}
+
+static inline void TFN(s_rpy_to_mat44)(const TNAME rpy[3], TNAME M[16])
+{
+    TNAME q[4];
+    TFN(s_rpy_to_quat)(rpy, q);
+    TFN(s_quat_to_mat44)(q, M);
+}
+
+
+static inline void TFN(s_xyzrpy_to_mat44)(const TNAME xyzrpy[6], TNAME M[16])
+{
+    TFN(s_rpy_to_mat44)(&xyzrpy[3], M);
+    M[3] = xyzrpy[0];
+    M[7] = xyzrpy[1];
+    M[11] = xyzrpy[2];
+}
+
+static inline void TFN(s_mat44_transform_xyz)(const TNAME M[16], const TNAME in[3], TNAME out[3])
+{
+    for (int i = 0; i < 3; i++)
+        out[i] = M[4*i + 0]*in[0] + M[4*i + 1]*in[1] + M[4*i + 2]*in[2] + M[4*i + 3];
+}
+
+// out = (upper 3x3 of M) * in
+static inline void TFN(s_mat44_rotate_vector)(const TNAME M[16], const TNAME in[3], TNAME out[3])
+{
+    for (int i = 0; i < 3; i++)
+        out[i] = M[4*i + 0]*in[0] + M[4*i + 1]*in[1] + M[4*i + 2]*in[2];
+}
+
+static inline void TFN(s_mat44_to_xyt)(const TNAME M[16], TNAME xyt[3])
+{
+    // c -s
+    // s  c
+    xyt[0] = M[3];
+    xyt[1] = M[7];
+    xyt[2] = (TNAME)atan2(M[4], M[0]);
+}
+
+static inline void TFN(s_mat_to_xyz)(const TNAME M[16], TNAME xyz[3])
+{
+    xyz[0] = M[3];
+    xyz[1] = M[7];
+    xyz[2] = M[11];
+}
+
+static inline void TFN(s_mat_to_quat)(const TNAME M[16], TNAME q[4])
+{
+    double T = M[0] + M[5] + M[10] + 1.0;
+    double S;
+
+    if (T > 0.0000001) {
+        S = sqrt(T) * 2;
+        q[0] = (TNAME)(0.25 * S);
+        q[1] = (TNAME)((M[9] - M[6]) / S);
+        q[2] = (TNAME)((M[2] - M[8]) / S);
+        q[3] = (TNAME)((M[4] - M[1]) / S);
+    } else if (M[0] > M[5] && M[0] > M[10]) {   // Column 0:
+        S = sqrt(1.0 + M[0] - M[5] - M[10]) * 2;
+        q[0] = (TNAME)((M[9] - M[6]) / S);
+        q[1] = (TNAME)(0.25 * S);
+        q[2] = (TNAME)((M[4] + M[1]) / S);
+        q[3] = (TNAME)((M[2] + M[8]) / S);
+    } else if (M[5] > M[10]) {                  // Column 1:
+        S = sqrt(1.0 + M[5] - M[0] - M[10]) * 2;
+        q[0] = (TNAME)((M[2] - M[8]) / S);
+        q[1] = (TNAME)((M[4] + M[1]) / S);
+        q[2] = (TNAME)(0.25 * S);
+        q[3] = (TNAME)((M[9] + M[6]) / S);
+    } else {                                    // Column 2:
+        S = sqrt(1.0 + M[10] - M[0] - M[5]);
+        q[0] = (TNAME)((M[4] - M[1]) / S);
+        q[1] = (TNAME)((M[2] + M[8]) / S);
+        q[2] = (TNAME)((M[9] + M[6]) / S);
+        q[3] = (TNAME)(0.25 * S);
+    }
+
+    TFN(s_normalize)(q, 4, q);
+}
+
+static inline void TFN(s_quat_xyz_to_xyt)(const TNAME q[4], const TNAME xyz[3], TNAME xyt[3])
+{
+    TNAME M[16];
+    TFN(s_quat_xyz_to_mat44)(q, xyz, M);
+    TFN(s_mat44_to_xyt)(M, xyt);
+}
+
+// xytr = xyta * xytb;
+static inline void TFN(s_xyt_mul)(const TNAME xyta[3], const TNAME xytb[3], TNAME xytr[3])
+{
+    TNAME xa = xyta[0], ya = xyta[1], ta = xyta[2];
+    TNAME s = (TNAME)sin(ta), c = (TNAME)cos(ta);
+
+    xytr[0] = c*xytb[0] - s*xytb[1] + xa;
+    xytr[1] = s*xytb[0] + c*xytb[1] + ya;
+    xytr[2] = ta + xytb[2];
+}
+
+static inline void TFN(s_xytcov_copy)(const TNAME xyta[3], const TNAME Ca[9],
+                                      TNAME xytr[3], TNAME Cr[9])
+{
+    memcpy(xytr, xyta, 3 * sizeof(TNAME));
+    memcpy(Cr, Ca, 9 * sizeof(TNAME));
+}
+
+static inline void TFN(s_xytcov_mul)(const TNAME xyta[3], const TNAME Ca[9],
+                                      const TNAME xytb[3], const TNAME Cb[9],
+                                      TNAME xytr[3], TNAME Cr[9])
+{
+    TNAME xa = xyta[0], ya = xyta[1], ta = xyta[2];
+    TNAME xb = xytb[0], yb = xytb[1];
+
+    TNAME sa = (TNAME)sin(ta), ca = (TNAME)cos(ta);
+
+    TNAME P11 = Ca[0], P12 = Ca[1], P13 = Ca[2];
+    TNAME              P22 = Ca[4], P23 = Ca[5];
+    TNAME                           P33 = Ca[8];
+
+    TNAME Q11 = Cb[0], Q12 = Cb[1], Q13 = Cb[2];
+    TNAME              Q22 = Cb[4], Q23 = Cb[5];
+    TNAME                           Q33 = Cb[8];
+
+    TNAME JA13 = -sa*xb - ca*yb;
+    TNAME JA23 = ca*xb - sa*yb;
+    TNAME JB11 = ca;
+    TNAME JB12 = -sa;
+    TNAME JB21 = sa;
+    TNAME JB22 = ca;
+
+    Cr[0] = P33*JA13*JA13 + 2*P13*JA13 + Q11*JB11*JB11 + 2*Q12*JB11*JB12 + Q22*JB12*JB12 + P11;
+    Cr[1] = P12 + JA23*(P13 + JA13*P33) + JA13*P23 + JB21*(JB11*Q11 + JB12*Q12) + JB22*(JB11*Q12 + JB12*Q22);
+    Cr[2] = P13 + JA13*P33 + JB11*Q13 + JB12*Q23;
+    Cr[3] = Cr[1];
+    Cr[4] = P33*JA23*JA23 + 2*P23*JA23 + Q11*JB21*JB21 + 2*Q12*JB21*JB22 + Q22*JB22*JB22 + P22;
+    Cr[5] = P23 + JA23*P33 + JB21*Q13 + JB22*Q23;
+    Cr[6] = Cr[2];
+    Cr[7] = Cr[5];
+    Cr[8] = P33 + Q33;
+
+    xytr[0] = ca*xb - sa*yb + xa;
+    xytr[1] = sa*xb + ca*yb + ya;
+    xytr[2] = xyta[2] + xytb[2];
+
+/*
+  // the code above is just an unrolling of the following:
+
+        TNAME JA[][] = new TNAME[][] { { 1, 0, -sa*xb - ca*yb },
+                                         { 0, 1, ca*xb - sa*yb },
+                                         { 0, 0, 1 } };
+        TNAME JB[][] = new TNAME[][] { { ca, -sa, 0 },
+                                         { sa, ca, 0 },
+                                         { 0,  0,  1 } };
+
+        newge.P = LinAlg.add(LinAlg.matrixABCt(JA, P, JA),
+                             LinAlg.matrixABCt(JB, ge.P, JB));
+*/
+}
+
+
+static inline void TFN(s_xyt_inv)(const TNAME xyta[3], TNAME xytr[3])
+{
+    TNAME s = (TNAME)sin(xyta[2]), c = (TNAME)cos(xyta[2]);
+    xytr[0] = -s*xyta[1] - c*xyta[0];
+    xytr[1] = -c*xyta[1] + s*xyta[0];
+    xytr[2] = -xyta[2];
+}
+
+static inline void TFN(s_xytcov_inv)(const TNAME xyta[3], const TNAME Ca[9],
+                                      TNAME xytr[3], TNAME Cr[9])
+{
+    TNAME x = xyta[0], y = xyta[1], theta = xyta[2];
+    TNAME s = (TNAME)sin(theta), c = (TNAME)cos(theta);
+
+    TNAME J11 = -c, J12 = -s, J13 = -c*y + s*x;
+    TNAME J21 = s,  J22 = -c, J23 = s*y + c*x;
+
+    TNAME P11 = Ca[0], P12 = Ca[1], P13 = Ca[2];
+    TNAME              P22 = Ca[4], P23 = Ca[5];
+    TNAME                           P33 = Ca[8];
+
+    Cr[0] = P11*J11*J11 + 2*P12*J11*J12 + 2*P13*J11*J13 +
+        P22*J12*J12 + 2*P23*J12*J13 + P33*J13*J13;
+    Cr[1] = J21*(J11*P11 + J12*P12 + J13*P13) +
+        J22*(J11*P12 + J12*P22 + J13*P23) +
+        J23*(J11*P13 + J12*P23 + J13*P33);
+    Cr[2] = - J11*P13 - J12*P23 - J13*P33;
+    Cr[3] = Cr[1];
+    Cr[4] = P11*J21*J21 + 2*P12*J21*J22 + 2*P13*J21*J23 +
+        P22*J22*J22 + 2*P23*J22*J23 + P33*J23*J23;
+    Cr[5] = - J21*P13 - J22*P23 - J23*P33;
+    Cr[6] = Cr[2];
+    Cr[7] = Cr[5];
+    Cr[8] = P33;
+
+    /*
+    // the code above is just an unrolling of the following:
+
+    TNAME J[][] = new TNAME[][] { { -c, -s, -c*y + s*x },
+                                    { s,  -c,  s*y + c*x },
+                                    { 0,   0,     -1     } };
+    ge.P = LinAlg.matrixABCt(J, P, J);
+    */
+
+    xytr[0] = -s*y - c*x;
+    xytr[1] = -c*y + s*x;
+    xytr[2] = -xyta[2];
+}
+
+// xytr = inv(xyta) * xytb
+static inline void TFN(s_xyt_inv_mul)(const TNAME xyta[3], const TNAME xytb[3], TNAME xytr[3])
+{
+    TNAME theta = xyta[2];
+    TNAME ca = (TNAME)cos(theta);
+    TNAME sa = (TNAME)sin(theta);
+    TNAME dx = xytb[0] - xyta[0];
+    TNAME dy = xytb[1] - xyta[1];
+
+    xytr[0] = ca*dx + sa*dy;
+    xytr[1] = -sa*dx + ca*dy;
+    xytr[2]= xytb[2] - xyta[2];
+}
+
+static inline void TFN(s_mat_add)(const TNAME *A, int Arows, int Acols,
+                                   const TNAME *B, int Brows, int Bcols,
+                                   TNAME *R, int Rrows, int Rcols)
+{
+    assert(Arows == Brows);
+    assert(Arows == Rrows);
+    assert(Bcols == Bcols);
+    assert(Bcols == Rcols);
+
+    for (int i = 0; i < Arows; i++)
+        for (int j = 0; j < Bcols; j++)
+            R[i*Acols + j] = A[i*Acols + j] + B[i*Acols + j];
+}
+
+// matrix should be in row-major order, allocated in a single packed
+// array. (This is compatible with matd.)
+static inline void TFN(s_mat_AB)(const TNAME *A, int Arows, int Acols,
+                                  const TNAME *B, int Brows, int Bcols,
+                                  TNAME *R, int Rrows, int Rcols)
+{
+    assert(Acols == Brows);
+    assert(Rrows == Arows);
+    assert(Bcols == Rcols);
+
+    for (int Rrow = 0; Rrow < Rrows; Rrow++) {
+        for (int Rcol = 0; Rcol < Rcols; Rcol++) {
+            TNAME acc = 0;
+            for (int i = 0; i < Acols; i++)
+                acc += A[Rrow*Acols + i] * B[i*Bcols + Rcol];
+            R[Rrow*Rcols + Rcol] = acc;
+        }
+    }
+}
+
+// matrix should be in row-major order, allocated in a single packed
+// array. (This is compatible with matd.)
+static inline void TFN(s_mat_ABt)(const TNAME *A, int Arows, int Acols,
+                                  const TNAME *B, int Brows, int Bcols,
+                                  TNAME *R, int Rrows, int Rcols)
+{
+    assert(Acols == Bcols);
+    assert(Rrows == Arows);
+    assert(Brows == Rcols);
+
+    for (int Rrow = 0; Rrow < Rrows; Rrow++) {
+        for (int Rcol = 0; Rcol < Rcols; Rcol++) {
+            TNAME acc = 0;
+            for (int i = 0; i < Acols; i++)
+                acc += A[Rrow*Acols + i] * B[Rcol*Bcols + i];
+            R[Rrow*Rcols + Rcol] = acc;
+        }
+    }
+}
+
+static inline void TFN(s_mat_ABC)(const TNAME *A, int Arows, int Acols,
+                                  const TNAME *B, int Brows, int Bcols,
+                                  const TNAME *C, int Crows, int Ccols,
+                                  TNAME *R, int Rrows, int Rcols)
+{
+    TNAME *tmp = malloc(sizeof(TNAME)*Arows*Bcols);
+
+    TFN(s_mat_AB)(A, Arows, Acols, B, Brows, Bcols, tmp, Arows, Bcols);
+    TFN(s_mat_AB)(tmp, Arows, Bcols, C, Crows, Ccols, R, Rrows, Rcols);
+    free(tmp);
+}
+
+static inline void TFN(s_mat_Ab)(const TNAME *A, int Arows, int Acols,
+                                  const TNAME *B, int Blength,
+                                  TNAME *R, int Rlength)
+{
+    assert(Acols == Blength);
+    assert(Arows == Rlength);
+
+    for (int Ridx = 0; Ridx < Rlength; Ridx++) {
+        TNAME acc = 0;
+        for (int i = 0; i < Blength; i++)
+            acc += A[Ridx*Acols + i] * B[i];
+        R[Ridx] = acc;
+    }
+}
+
+static inline void TFN(s_mat_AtB)(const TNAME *A, int Arows, int Acols,
+                                   const TNAME *B, int Brows, int Bcols,
+                                   TNAME *R, int Rrows, int Rcols)
+{
+    assert(Arows == Brows);
+    assert(Rrows == Acols);
+    assert(Bcols == Rcols);
+
+    for (int Rrow = 0; Rrow < Rrows; Rrow++) {
+        for (int Rcol = 0; Rcol < Rcols; Rcol++) {
+            TNAME acc = 0;
+            for (int i = 0; i < Acols; i++)
+                acc += A[i*Acols + Rrow] * B[i*Bcols + Rcol];
+            R[Rrow*Rcols + Rcol] = acc;
+        }
+    }
+}
+
+static inline void TFN(s_quat_slerp)(const TNAME q0[4], const TNAME _q1[4], TNAME r[4], TNAME w)
+{
+    TNAME dot = TFN(s_dot)(q0, _q1, 4);
+
+    TNAME q1[4];
+    memcpy(q1, _q1, sizeof(TNAME) * 4);
+
+    if (dot < 0) {
+        // flip sign on one of them so we don't spin the "wrong
+        // way" around. This doesn't change the rotation that the
+        // quaternion represents.
+        dot = -dot;
+        for (int i = 0; i < 4; i++)
+            q1[i] *= -1;
+    }
+
+    // if large dot product (1), slerp will scale both q0 and q1
+    // by 0, and normalization will blow up.
+    if (dot > 0.95) {
+
+        for (int i = 0; i < 4; i++)
+            r[i] = q0[i]*(1-w) + q1[i]*w;
+
+    } else {
+        TNAME angle = (TNAME)acos(dot);
+
+        TNAME w0 = (TNAME)sin(angle*(1-w)), w1 = (TNAME)sin(angle*w);
+
+        for (int i = 0; i < 4; i++)
+            r[i] = q0[i]*w0 + q1[i]*w1;
+
+        TFN(s_normalize)(r, 4, r);
+    }
+}
+
+static inline void TFN(s_cross_product)(const TNAME v1[3], const TNAME v2[3], TNAME r[3])
+{
+    r[0] = v1[1]*v2[2] - v1[2]*v2[1];
+    r[1] = v1[2]*v2[0] - v1[0]*v2[2];
+    r[2] = v1[0]*v2[1] - v1[1]*v2[0];
+}
+
+////////////////////
+static inline void TFN(s_mat44_identity)(TNAME out[16])
+{
+    memset(out, 0, 16 * sizeof(TNAME));
+    out[0] = 1;
+    out[5] = 1;
+    out[10] = 1;
+    out[15] = 1;
+}
+
+static inline void TFN(s_mat44_translate)(const TNAME txyz[3], TNAME out[16])
+{
+    TFN(s_mat44_identity)(out);
+
+    for (int i = 0; i < 3; i++)
+        out[4*i + 3] += txyz[i];
+}
+
+static inline void TFN(s_mat44_scale)(const TNAME sxyz[3], TNAME out[16])
+{
+    TFN(s_mat44_identity)(out);
+
+    for (int i = 0; i < 3; i++)
+        out[4*i + i] = sxyz[i];
+}
+
+static inline void TFN(s_mat44_rotate_z)(TNAME rad, TNAME out[16])
+{
+    TFN(s_mat44_identity)(out);
+    TNAME s = (TNAME)sin(rad), c = (TNAME)cos(rad);
+    out[0*4 + 0] = c;
+    out[0*4 + 1] = -s;
+    out[1*4 + 0] = s;
+    out[1*4 + 1] = c;
+}
+
+static inline void TFN(s_mat44_rotate_y)(TNAME rad, TNAME out[16])
+{
+    TFN(s_mat44_identity)(out);
+    TNAME s = (TNAME)sin(rad), c = (TNAME)cos(rad);
+    out[0*4 + 0] = c;
+    out[0*4 + 2] = s;
+    out[2*4 + 0] = -s;
+    out[2*4 + 2] = c;
+}
+
+static inline void TFN(s_mat44_rotate_x)(TNAME rad, TNAME out[16])
+{
+    TFN(s_mat44_identity)(out);
+    TNAME s = (TNAME)sin(rad), c = (TNAME)cos(rad);
+    out[1*4 + 1] = c;
+    out[1*4 + 2] = -s;
+    out[2*4 + 1] = s;
+    out[2*4 + 2] = c;
+}
+
+// out = out * translate(txyz)
+static inline void TFN(s_mat44_translate_self)(const TNAME txyz[3], TNAME out[16])
+{
+    TNAME tmp[16], prod[16];
+    TFN(s_mat44_translate(txyz, tmp));
+    TFN(s_mat_AB)(out, 4, 4, tmp, 4, 4, prod, 4, 4);
+    memcpy(out, prod, sizeof(TNAME)*16);
+}
+
+static inline void TFN(s_mat44_scale_self)(const TNAME sxyz[3], TNAME out[16])
+{
+    TNAME tmp[16], prod[16];
+    TFN(s_mat44_scale(sxyz, tmp));
+    TFN(s_mat_AB)(out, 4, 4, tmp, 4, 4, prod, 4, 4);
+    memcpy(out, prod, sizeof(TNAME)*16);
+}
+
+static inline void TFN(s_mat44_rotate_z_self)(TNAME rad, TNAME out[16])
+{
+    TNAME tmp[16], prod[16];
+    TFN(s_mat44_rotate_z(rad, tmp));
+    TFN(s_mat_AB)(out, 4, 4, tmp, 4, 4, prod, 4, 4);
+    memcpy(out, prod, sizeof(TNAME)*16);
+}
+
+// out = inv(M)*in. Note: this assumes that mat44 is a rigid-body transformation.
+static inline void TFN(s_mat44_inv)(const TNAME M[16], TNAME out[16])
+{
+// NB: M = T*R,  inv(M) = inv(R) * inv(T)
+
+    // transpose of upper-left corner
+    for (int i = 0; i < 3; i++)
+        for (int j = 0; j < 3; j++)
+            out[4*i + j] = M[4*j + i];
+
+    out[4*0 + 3] = 0;
+    out[4*1 + 3] = 0;
+    out[4*2 + 3] = 0;
+
+    for (int i = 0; i < 3; i++)
+        for (int j = 0; j < 3; j++)
+            out[4*i + 3] -= out[4*i + j] * M[4*j + 3];
+
+    out[4*3 + 0] = 0;
+    out[4*3 + 1] = 0;
+    out[4*3 + 2] = 0;
+    out[4*3 + 3] = 1;
+
+/*    TNAME tmp[16];
+    TFN(s_mat_AB)(M, 4, 4, out, 4, 4, tmp, 4, 4);
+    printf("identity: ");
+    TFN(s_print_mat)(tmp, 4, 4, "%15f"); */
+}
+
+// out = inv(M)*in
+static inline void TFN(s_mat44_inv_transform_xyz)(const TNAME M[16], const TNAME in[3], TNAME out[3])
+{
+    TNAME T[16];
+    TFN(s_mat44_inv)(M, T);
+
+    TFN(s_mat44_transform_xyz)(T, in, out);
+}
+
+// out = (upper 3x3 of inv(M)) * in
+static inline void TFN(s_mat44_inv_rotate_vector)(const TNAME M[16], const TNAME in[3], TNAME out[3])
+{
+    TNAME T[16];
+    TFN(s_mat44_inv)(M, T);
+
+    TFN(s_mat44_rotate_vector)(T, in, out);
+}
+
+static inline void TFN(s_elu_to_mat44)(const TNAME eye[3], const TNAME lookat[3], const TNAME _up[3],
+                                       TNAME M[16])
+{
+    TNAME f[3];
+    TFN(s_subtract)(lookat, eye, 3, f);
+    TFN(s_normalize)(f, 3, f);
+
+    TNAME up[3];
+
+    // remove any component of 'up' that isn't perpendicular to the look direction.
+    TFN(s_normalize)(_up, 3, up);
+
+    TNAME up_dot = TFN(s_dot)(f, up, 3);
+    for (int i = 0; i < 3; i++)
+        up[i] -= up_dot*f[i];
+
+    TFN(s_normalize_self)(up, 3);
+
+    TNAME s[3], u[3];
+    TFN(s_cross_product)(f, up, s);
+    TFN(s_cross_product)(s, f, u);
+
+    TNAME R[16] = {  s[0],  s[1],  s[2], 0,
+                     u[0],  u[1],  u[2], 0,
+                    -f[0], -f[1], -f[2], 0,
+                     0,     0,     0,    1};
+
+    TNAME T[16] = {1, 0, 0,  -eye[0],
+                    0, 1, 0, -eye[1],
+                    0, 0, 1, -eye[2],
+                    0, 0, 0, 1};
+
+    // M is the extrinsics matrix [R | t] where t = -R*c
+    TNAME tmp[16];
+    TFN(s_mat_AB)(R, 4, 4, T, 4, 4, tmp, 4, 4);
+    TFN(s_mat44_inv)(tmp, M);
+}
+
+// Computes the cholesky factorization of A, putting the lower
+// triangular matrix into R.
+static inline void TFN(s_mat33_chol)(const TNAME *A, int Arows, int Acols,
+                                     TNAME *R, int Brows, int Bcols)
+{
+    assert(Arows == Brows);
+    assert(Bcols == Bcols);
+
+    // A[0] = R[0]*R[0]
+    R[0] = (TNAME)sqrt(A[0]);
+
+    // A[1] = R[0]*R[3];
+    R[3] = A[1] / R[0];
+
+    // A[2] = R[0]*R[6];
+    R[6] = A[2] / R[0];
+
+    // A[4] = R[3]*R[3] + R[4]*R[4]
+    R[4] = (TNAME)sqrt(A[4] - R[3]*R[3]);
+
+    // A[5] = R[3]*R[6] + R[4]*R[7]
+    R[7] = (A[5] - R[3]*R[6]) / R[4];
+
+    // A[8] = R[6]*R[6] + R[7]*R[7] + R[8]*R[8]
+    R[8] = (TNAME)sqrt(A[8] - R[6]*R[6] - R[7]*R[7]);
+
+    R[1] = 0;
+    R[2] = 0;
+    R[5] = 0;
+}
+
+static inline void TFN(s_mat33_lower_tri_inv)(const TNAME *A, int Arows, int Acols,
+                                              TNAME *R, int Rrows, int Rcols)
+{
+    // A[0]*R[0] = 1
+    R[0] = 1 / A[0];
+
+    // A[3]*R[0] + A[4]*R[3] = 0
+    R[3] = -A[3]*R[0] / A[4];
+
+    // A[4]*R[4] = 1
+    R[4] = 1 / A[4];
+
+    // A[6]*R[0] + A[7]*R[3] + A[8]*R[6] = 0
+    R[6] = (-A[6]*R[0] - A[7]*R[3]) / A[8];
+
+    // A[7]*R[4] + A[8]*R[7] = 0
+    R[7] = -A[7]*R[4] / A[8];
+
+    // A[8]*R[8] = 1
+    R[8] = 1 / A[8];
+}
+
+
+static inline void TFN(s_mat33_sym_solve)(const TNAME *A, int Arows, int Acols,
+                                          const TNAME *B, int Brows, int Bcols,
+                                          TNAME *R, int Rrows, int Rcols)
+{
+    assert(Arows == Acols);
+    assert(Acols == 3);
+    assert(Brows == 3);
+    assert(Bcols == 1);
+    assert(Rrows == 3);
+    assert(Rcols == 1);
+
+    TNAME L[9];
+    TFN(s_mat33_chol)(A, 3, 3, L, 3, 3);
+
+    TNAME M[9];
+    TFN(s_mat33_lower_tri_inv)(L, 3, 3, M, 3, 3);
+
+    double tmp[3];
+    tmp[0] = M[0]*B[0];
+    tmp[1] = M[3]*B[0] + M[4]*B[1];
+    tmp[2] = M[6]*B[0] + M[7]*B[1] + M[8]*B[2];
+
+    R[0] = (TNAME)(M[0]*tmp[0] + M[3]*tmp[1] + M[6]*tmp[2]);
+    R[1] = (TNAME)(M[4]*tmp[1] + M[7]*tmp[2]);
+    R[2] = (TNAME)(M[8]*tmp[2]);
+}
+
+/*
+// solve Ax = B. Assumes A is symmetric; uses cholesky factorization
+static inline void TFN(s_mat_solve_chol)(const TNAME *A, int Arows, int Acols,
+                                         const TNAME *B, int Brows, int Bcols,
+                                         TNAME *R, int Rrows, int Rcols)
+{
+    assert(Arows == Acols);
+    assert(Arows == Brows);
+    assert(Acols == Rrows);
+    assert(Bcols == Rcols);
+
+    //
+}
+*/
+#undef TRRFN
+#undef TRFN
+#undef TFN
diff --git a/common/floats.h b/common/floats.h
new file mode 100644
index 0000000..22f839f
--- /dev/null
+++ b/common/floats.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#define TNAME float
+#include "doubles_floats_impl.h"
+#undef TNAME
diff --git a/common/g2d.c b/common/g2d.c
new file mode 100644
index 0000000..4645f20
--- /dev/null
+++ b/common/g2d.c
@@ -0,0 +1,919 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "g2d.h"
+#include "common/math_util.h"
+
+#ifdef _WIN32
+static inline long int random(void)
+{
+        return rand();
+}
+#endif
+
+double g2d_distance(const double a[2], const double b[2])
+{
+    return sqrtf(sq(a[0]-b[0]) + sq(a[1]-b[1]));
+}
+
+zarray_t *g2d_polygon_create_empty()
+{
+    return zarray_create(sizeof(double[2]));
+}
+
+void g2d_polygon_add(zarray_t *poly, double v[2])
+{
+    zarray_add(poly, v);
+}
+
+zarray_t *g2d_polygon_create_data(double v[][2], int sz)
+{
+    zarray_t *points = g2d_polygon_create_empty();
+
+    for (int i = 0; i < sz; i++)
+        g2d_polygon_add(points, v[i]);
+
+    return points;
+}
+
+zarray_t *g2d_polygon_create_zeros(int sz)
+{
+    zarray_t *points = zarray_create(sizeof(double[2]));
+
+    double z[2] = { 0, 0 };
+
+    for (int i = 0; i < sz; i++)
+        zarray_add(points, z);
+
+    return points;
+}
+
+void g2d_polygon_make_ccw(zarray_t *poly)
+{
+    // Step one: we want the points in counter-clockwise order.
+    // If the points are in clockwise order, we'll reverse them.
+    double total_theta = 0;
+    double last_theta = 0;
+
+    // Count the angle accumulated going around the polygon. If
+    // the sum is +2pi, it's CCW. Otherwise, we'll get -2pi.
+    int sz = zarray_size(poly);
+
+    for (int i = 0; i <= sz; i++) {
+        double p0[2], p1[2];
+        zarray_get(poly, i % sz, &p0);
+        zarray_get(poly, (i+1) % sz, &p1);
+
+        double this_theta = atan2(p1[1]-p0[1], p1[0]-p0[0]);
+
+        if (i > 0) {
+            double dtheta = mod2pi(this_theta-last_theta);
+            total_theta += dtheta;
+        }
+
+        last_theta = this_theta;
+    }
+
+    int ccw = (total_theta > 0);
+
+    // reverse order if necessary.
+    if (!ccw) {
+        for (int i = 0; i < sz / 2; i++) {
+            double a[2], b[2];
+
+            zarray_get(poly, i, a);
+            zarray_get(poly, sz-1-i, b);
+            zarray_set(poly, i, b, NULL);
+            zarray_set(poly, sz-1-i, a, NULL);
+        }
+    }
+}
+
+int g2d_polygon_contains_point_ref(const zarray_t *poly, double q[2])
+{
+    // use winding. If the point is inside the polygon, we'll wrap
+    // around it (accumulating 6.28 radians). If we're outside the
+    // polygon, we'll accumulate zero.
+    int psz = zarray_size(poly);
+
+    double acc_theta = 0;
+
+    double last_theta;
+
+    for (int i = 0; i <= psz; i++) {
+        double p[2];
+
+        zarray_get(poly, i % psz, &p);
+
+        double this_theta = atan2(q[1]-p[1], q[0]-p[0]);
+
+        if (i != 0)
+            acc_theta += mod2pi(this_theta - last_theta);
+
+        last_theta = this_theta;
+    }
+
+    return acc_theta > M_PI;
+}
+
+/*
+// sort by x coordinate, ascending
+static int g2d_convex_hull_sort(const void *_a, const void *_b)
+{
+    double *a = (double*) _a;
+    double *b = (double*) _b;
+
+    if (a[0] < b[0])
+        return -1;
+    if (a[0] == b[0])
+        return 0;
+    return 1;
+}
+*/
+
+/*
+zarray_t *g2d_convex_hull2(const zarray_t *points)
+{
+    zarray_t *hull = zarray_copy(points);
+
+    zarray_sort(hull, g2d_convex_hull_sort);
+
+    int hsz = zarray_size(hull);
+    int hout = 0;
+
+    for (int hin = 1; hin < hsz; hin++) {
+        double *p;
+        zarray_get_volatile(hull, i, &p);
+
+        // Everything to the right of hin is already convex. We now
+        // add one point, p, which begins "connected" by two
+        // (coincident) edges from the last right-most point to p.
+        double *last;
+        zarray_get_volatile(hull, hout, &last);
+
+        // We now remove points from the convex hull by moving
+    }
+
+    return hull;
+}
+*/
+
+// creates and returns a zarray(double[2]). The resulting polygon is
+// CCW and implicitly closed. Unnecessary colinear points are omitted.
+zarray_t *g2d_convex_hull(const zarray_t *points)
+{
+    zarray_t *hull = zarray_create(sizeof(double[2]));
+
+    // gift-wrap algorithm.
+
+    // step 1: find left most point.
+    int insz = zarray_size(points);
+
+    // must have at least 2 points. (XXX need 3?)
+    assert(insz >= 2);
+
+    double *pleft = NULL;
+    for (int i = 0; i < insz; i++) {
+        double *p;
+        zarray_get_volatile(points, i, &p);
+
+        if (pleft == NULL || p[0] < pleft[0])
+            pleft = p;
+    }
+
+    // cannot be NULL since there must be at least one point.
+    assert(pleft != NULL);
+
+    zarray_add(hull, pleft);
+
+    // step 2. gift wrap. Keep searching for points that make the
+    // smallest-angle left-hand turn. This implementation is carefully
+    // written to use only addition/subtraction/multiply. No division
+    // or sqrts. This guarantees exact results for integer-coordinate
+    // polygons (no rounding/precision problems).
+    double *p = pleft;
+
+    while (1) {
+        assert(p != NULL);
+
+        double *q = NULL;
+        double n0 = 0, n1 = 0; // the normal to the line (p, q) (not
+                       // necessarily unit length).
+
+        // Search for the point q for which the line (p,q) is most "to
+        // the right of" the other points. (i.e., every time we find a
+        // point that is to the right of our current line, we change
+        // lines.)
+        for (int i = 0; i < insz; i++) {
+            double *thisq;
+            zarray_get_volatile(points, i, &thisq);
+
+            if (thisq == p)
+                continue;
+
+            // the first time we find another point, we initialize our
+            // value of q, forming the line (p,q)
+            if (q == NULL) {
+                q = thisq;
+                n0 = q[1] - p[1];
+                n1 = -q[0] + p[0];
+            } else {
+                // we already have a line (p,q). is point thisq RIGHT OF line (p, q)?
+                double e0 = thisq[0] - p[0], e1 = thisq[1] - p[1];
+                double dot = e0*n0 + e1*n1;
+
+                if (dot > 0) {
+                    // it is. change our line.
+                    q = thisq;
+                    n0 = q[1] - p[1];
+                    n1 = -q[0] + p[0];
+                }
+            }
+        }
+
+        // we must have elected *some* line, so long as there are at
+        // least 2 points in the polygon.
+        assert(q != NULL);
+
+        // loop completed?
+        if (q == pleft)
+            break;
+
+        int colinear = 0;
+
+        // is this new point colinear with the last two?
+        if (zarray_size(hull) > 1) {
+            double *o;
+            zarray_get_volatile(hull, zarray_size(hull) - 2, &o);
+
+            double e0 = o[0] - p[0];
+            double e1 = o[1] - p[1];
+
+            if (n0*e0 + n1*e1 == 0)
+                colinear = 1;
+        }
+
+        // if it is colinear, overwrite the last one.
+        if (colinear)
+            zarray_set(hull, zarray_size(hull)-1, q, NULL);
+        else
+            zarray_add(hull, q);
+
+        p = q;
+    }
+
+    return hull;
+}
+
+// Find point p on the boundary of poly that is closest to q.
+void g2d_polygon_closest_boundary_point(const zarray_t *poly, const double q[2], double *p)
+{
+    int psz = zarray_size(poly);
+    double min_dist = HUGE_VALF;
+
+    for (int i = 0; i < psz; i++) {
+        double *p0, *p1;
+
+        zarray_get_volatile(poly, i, &p0);
+        zarray_get_volatile(poly, (i+1) % psz, &p1);
+
+        g2d_line_segment_t seg;
+        g2d_line_segment_init_from_points(&seg, p0, p1);
+
+        double thisp[2];
+        g2d_line_segment_closest_point(&seg, q, thisp);
+
+        double dist = g2d_distance(q, thisp);
+        if (dist < min_dist) {
+            memcpy(p, thisp, sizeof(double[2]));
+            min_dist = dist;
+        }
+    }
+}
+
+int g2d_polygon_contains_point(const zarray_t *poly, double q[2])
+{
+    // use winding. If the point is inside the polygon, we'll wrap
+    // around it (accumulating 6.28 radians). If we're outside the
+    // polygon, we'll accumulate zero.
+    int psz = zarray_size(poly);
+    assert(psz > 0);
+
+    int last_quadrant;
+    int quad_acc = 0;
+
+    for (int i = 0; i <= psz; i++) {
+        double *p;
+
+        zarray_get_volatile(poly, i % psz, &p);
+
+        // p[0] < q[0]       p[1] < q[1]    quadrant
+        //     0                 0              0
+        //     0                 1              3
+        //     1                 0              1
+        //     1                 1              2
+
+        // p[1] < q[1]       p[0] < q[0]    quadrant
+        //     0                 0              0
+        //     0                 1              1
+        //     1                 0              3
+        //     1                 1              2
+
+        int quadrant;
+        if (p[0] < q[0])
+            quadrant = (p[1] < q[1]) ? 2 : 1;
+        else
+            quadrant = (p[1] < q[1]) ? 3 : 0;
+
+        if (i > 0) {
+            int dquadrant = quadrant - last_quadrant;
+
+            // encourage a jump table by mapping to small positive integers.
+            switch (dquadrant) {
+                case -3:
+                case 1:
+                    quad_acc ++;
+                    break;
+                case -1:
+                case 3:
+                    quad_acc --;
+                    break;
+                case 0:
+                    break;
+                case -2:
+                case 2:
+                {
+                    // get the previous point.
+                    double *p0;
+                    zarray_get_volatile(poly, i-1, &p0);
+
+                    // Consider the points p0 and p (the points around the
+                    //polygon that we are tracing) and the query point q.
+                    //
+                    // If we've moved diagonally across quadrants, we want
+                    // to measure whether we have rotated +PI radians or
+                    // -PI radians. We can test this by computing the dot
+                    // product of vector (p0-q) with the vector
+                    // perpendicular to vector (p-q)
+                    double nx = p[1] - q[1];
+                    double ny = -p[0] + q[0];
+
+                    double dot = nx*(p0[0]-q[0]) + ny*(p0[1]-q[1]);
+                    if (dot < 0)
+                        quad_acc -= 2;
+                    else
+                        quad_acc += 2;
+
+                    break;
+                }
+            }
+        }
+
+        last_quadrant = quadrant;
+    }
+
+    int v = (quad_acc >= 2) || (quad_acc <= -2);
+
+    #if 0
+    if (v != g2d_polygon_contains_point_ref(poly, q)) {
+        printf("FAILURE %d %d\n", v, quad_acc);
+        exit(-1);
+    }
+    #endif
+
+    return v;
+}
+
+void g2d_line_init_from_points(g2d_line_t *line, const double p0[2], const double p1[2])
+{
+    line->p[0] = p0[0];
+    line->p[1] = p0[1];
+    line->u[0] = p1[0]-p0[0];
+    line->u[1] = p1[1]-p0[1];
+    double mag = sqrtf(sq(line->u[0]) + sq(line->u[1]));
+
+    line->u[0] /= mag;
+    line->u[1] /= mag;
+}
+
+double g2d_line_get_coordinate(const g2d_line_t *line, const double q[2])
+{
+    return (q[0]-line->p[0])*line->u[0] + (q[1]-line->p[1])*line->u[1];
+}
+
+// Compute intersection of two line segments. If they intersect,
+// result is stored in p and 1 is returned. Otherwise, zero is
+// returned. p may be NULL.
+int g2d_line_intersect_line(const g2d_line_t *linea, const g2d_line_t *lineb, double *p)
+{
+    // this implementation is many times faster than the original,
+    // mostly due to avoiding a general-purpose LU decomposition in
+    // Matrix.inverse().
+    double m00, m01, m10, m11;
+    double i00, i01;
+    double b00, b10;
+
+    m00 = linea->u[0];
+    m01= -lineb->u[0];
+    m10 = linea->u[1];
+    m11= -lineb->u[1];
+
+    // determinant of m
+    double det = m00*m11-m01*m10;
+
+    // parallel lines?
+    if (fabs(det) < 0.00000001)
+        return 0;
+
+    // inverse of m
+    i00 = m11/det;
+    i01 = -m01/det;
+
+    b00 = lineb->p[0] - linea->p[0];
+    b10 = lineb->p[1] - linea->p[1];
+
+    double x00; //, x10;
+    x00 = i00*b00+i01*b10;
+
+    if (p != NULL) {
+        p[0] = linea->u[0]*x00 + linea->p[0];
+        p[1] = linea->u[1]*x00 + linea->p[1];
+    }
+
+    return 1;
+}
+
+
+void g2d_line_segment_init_from_points(g2d_line_segment_t *seg, const double p0[2], const double p1[2])
+{
+    g2d_line_init_from_points(&seg->line, p0, p1);
+    seg->p1[0] = p1[0];
+    seg->p1[1] = p1[1];
+}
+
+// Find the point p on segment seg that is closest to point q.
+void g2d_line_segment_closest_point(const g2d_line_segment_t *seg, const double *q, double *p)
+{
+    double a = g2d_line_get_coordinate(&seg->line, seg->line.p);
+    double b = g2d_line_get_coordinate(&seg->line, seg->p1);
+    double c = g2d_line_get_coordinate(&seg->line, q);
+
+    if (a < b)
+        c = dclamp(c, a, b);
+    else
+        c = dclamp(c, b, a);
+
+    p[0] = seg->line.p[0] + c * seg->line.u[0];
+    p[1] = seg->line.p[1] + c * seg->line.u[1];
+}
+
+// Compute intersection of two line segments. If they intersect,
+// result is stored in p and 1 is returned. Otherwise, zero is
+// returned. p may be NULL.
+int g2d_line_segment_intersect_segment(const g2d_line_segment_t *sega, const g2d_line_segment_t *segb, double *p)
+{
+    double tmp[2];
+
+    if (!g2d_line_intersect_line(&sega->line, &segb->line, tmp))
+        return 0;
+
+    double a = g2d_line_get_coordinate(&sega->line, sega->line.p);
+    double b = g2d_line_get_coordinate(&sega->line, sega->p1);
+    double c = g2d_line_get_coordinate(&sega->line, tmp);
+
+    // does intersection lie on the first line?
+    if ((c<a && c<b) || (c>a && c>b))
+        return 0;
+
+    a = g2d_line_get_coordinate(&segb->line, segb->line.p);
+    b = g2d_line_get_coordinate(&segb->line, segb->p1);
+    c = g2d_line_get_coordinate(&segb->line, tmp);
+
+    // does intersection lie on second line?
+    if ((c<a && c<b) || (c>a && c>b))
+        return 0;
+
+    if (p != NULL) {
+        p[0] = tmp[0];
+        p[1] = tmp[1];
+    }
+
+    return 1;
+}
+
+// Compute intersection of a line segment and a line. If they
+// intersect, result is stored in p and 1 is returned. Otherwise, zero
+// is returned. p may be NULL.
+int g2d_line_segment_intersect_line(const g2d_line_segment_t *seg, const g2d_line_t *line, double *p)
+{
+    double tmp[2];
+
+    if (!g2d_line_intersect_line(&seg->line, line, tmp))
+        return 0;
+
+    double a = g2d_line_get_coordinate(&seg->line, seg->line.p);
+    double b = g2d_line_get_coordinate(&seg->line, seg->p1);
+    double c = g2d_line_get_coordinate(&seg->line, tmp);
+
+    // does intersection lie on the first line?
+    if ((c<a && c<b) || (c>a && c>b))
+        return 0;
+
+    if (p != NULL) {
+        p[0] = tmp[0];
+        p[1] = tmp[1];
+    }
+
+    return 1;
+}
+
+// do the edges of polya and polyb collide? (Does NOT test for containment).
+int g2d_polygon_intersects_polygon(const zarray_t *polya, const zarray_t *polyb)
+{
+    // do any of the line segments collide? If so, the answer is no.
+
+    // dumb N^2 method.
+    for (int ia = 0; ia < zarray_size(polya); ia++) {
+        double pa0[2], pa1[2];
+        zarray_get(polya, ia, pa0);
+        zarray_get(polya, (ia+1)%zarray_size(polya), pa1);
+
+        g2d_line_segment_t sega;
+        g2d_line_segment_init_from_points(&sega, pa0, pa1);
+
+        for (int ib = 0; ib < zarray_size(polyb); ib++) {
+            double pb0[2], pb1[2];
+            zarray_get(polyb, ib, pb0);
+            zarray_get(polyb, (ib+1)%zarray_size(polyb), pb1);
+
+            g2d_line_segment_t segb;
+            g2d_line_segment_init_from_points(&segb, pb0, pb1);
+
+            if (g2d_line_segment_intersect_segment(&sega, &segb, NULL))
+                return 1;
+        }
+    }
+
+    return 0;
+}
+
+// does polya completely contain polyb?
+int g2d_polygon_contains_polygon(const zarray_t *polya, const zarray_t *polyb)
+{
+    // do any of the line segments collide? If so, the answer is no.
+    if (g2d_polygon_intersects_polygon(polya, polyb))
+        return 0;
+
+    // if none of the edges cross, then the polygon is either fully
+    // contained or fully outside.
+    double p[2];
+    zarray_get(polyb, 0, p);
+
+    return g2d_polygon_contains_point(polya, p);
+}
+
+// compute a point that is inside the polygon. (It may not be *far* inside though)
+void g2d_polygon_get_interior_point(const zarray_t *poly, double *p)
+{
+    // take the first three points, which form a triangle. Find the middle point
+    double a[2], b[2], c[2];
+
+    zarray_get(poly, 0, a);
+    zarray_get(poly, 1, b);
+    zarray_get(poly, 2, c);
+
+    p[0] = (a[0]+b[0]+c[0])/3;
+    p[1] = (a[1]+b[1]+c[1])/3;
+}
+
+int g2d_polygon_overlaps_polygon(const zarray_t *polya, const zarray_t *polyb)
+{
+    // do any of the line segments collide? If so, the answer is yes.
+    if (g2d_polygon_intersects_polygon(polya, polyb))
+        return 1;
+
+    // if none of the edges cross, then the polygon is either fully
+    // contained or fully outside.
+    double p[2];
+    g2d_polygon_get_interior_point(polyb, p);
+
+    if (g2d_polygon_contains_point(polya, p))
+        return 1;
+
+    g2d_polygon_get_interior_point(polya, p);
+
+    if (g2d_polygon_contains_point(polyb, p))
+        return 1;
+
+    return 0;
+}
+
+static int double_sort_up(const void *_a, const void *_b)
+{
+    double a = *((double*) _a);
+    double b = *((double*) _b);
+
+    if (a < b)
+        return -1;
+
+    if (a == b)
+        return 0;
+
+    return 1;
+}
+
+// Compute the crossings of the polygon along line y, storing them in
+// the array x. X must be allocated to be at least as long as
+// zarray_size(poly). X will be sorted, ready for
+// rasterization. Returns the number of intersections (and elements
+// written to x).
+/*
+  To rasterize, do something like this:
+
+  double res = 0.099;
+  for (double y = y0; y < y1; y += res) {
+  double xs[zarray_size(poly)];
+
+  int xsz = g2d_polygon_rasterize(poly, y, xs);
+  int xpos = 0;
+  int inout = 0; // start off "out"
+
+  for (double x = x0; x < x1; x += res) {
+      while (x > xs[xpos] && xpos < xsz) {
+        xpos++;
+        inout ^= 1;
+      }
+
+    if (inout)
+       printf("y");
+    else
+       printf(" ");
+  }
+  printf("\n");
+*/
+
+// returns the number of x intercepts
+int g2d_polygon_rasterize(const zarray_t *poly, double y, double *x)
+{
+    int sz = zarray_size(poly);
+
+    g2d_line_t line;
+    if (1) {
+        double p0[2] = { 0, y };
+        double p1[2] = { 1, y };
+
+        g2d_line_init_from_points(&line, p0, p1);
+    }
+
+    int xpos = 0;
+
+    for (int i = 0; i < sz; i++) {
+        g2d_line_segment_t seg;
+        double *p0, *p1;
+        zarray_get_volatile(poly, i, &p0);
+        zarray_get_volatile(poly, (i+1)%sz, &p1);
+
+        g2d_line_segment_init_from_points(&seg, p0, p1);
+
+        double q[2];
+        if (g2d_line_segment_intersect_line(&seg, &line, q))
+            x[xpos++] = q[0];
+    }
+
+    qsort(x, xpos, sizeof(double), double_sort_up);
+
+    return xpos;
+}
+
+/*
+  /---(1,5)
+  (-2,4)-/        |
+  \          |
+  \        (1,2)--(2,2)\
+  \                     \
+  \                      \
+  (0,0)------------------(4,0)
+*/
+#if 0
+
+#include "timeprofile.h"
+
+int main(int argc, char *argv[])
+{
+    timeprofile_t *tp = timeprofile_create();
+
+    zarray_t *polya = g2d_polygon_create_data((double[][2]) {
+            { 0, 0},
+            { 4, 0},
+            { 2, 2},
+            { 1, 2},
+            { 1, 5},
+            { -2,4} }, 6);
+
+    zarray_t *polyb = g2d_polygon_create_data((double[][2]) {
+            { .1, .1},
+            { .5, .1},
+            { .1, .5 } }, 3);
+
+    zarray_t *polyc = g2d_polygon_create_data((double[][2]) {
+            { 3, 0},
+            { 5, 0},
+            { 5, 1} }, 3);
+
+    zarray_t *polyd = g2d_polygon_create_data((double[][2]) {
+            { 5, 5},
+            { 6, 6},
+            { 5, 6} }, 3);
+
+/*
+  5      L---K
+  4      |I--J
+  3      |H-G
+  2      |E-F
+  1      |D--C
+  0      A---B
+  01234
+*/
+    zarray_t *polyE = g2d_polygon_create_data((double[][2]) {
+            {0,0}, {4,0}, {4, 1}, {1,1},
+                                  {1,2}, {3,2}, {3,3}, {1,3},
+                                                       {1,4}, {4,4}, {4,5}, {0,5}}, 12);
+
+    srand(0);
+
+    timeprofile_stamp(tp, "begin");
+
+    if (1) {
+        int niters = 100000;
+
+        for (int i = 0; i < niters; i++) {
+            double q[2];
+            q[0] = 10.0f * random() / RAND_MAX - 2;
+            q[1] = 10.0f * random() / RAND_MAX - 2;
+
+            g2d_polygon_contains_point(polyE, q);
+        }
+
+        timeprofile_stamp(tp, "fast");
+
+        for (int i = 0; i < niters; i++) {
+            double q[2];
+            q[0] = 10.0f * random() / RAND_MAX - 2;
+            q[1] = 10.0f * random() / RAND_MAX - 2;
+
+            g2d_polygon_contains_point_ref(polyE, q);
+        }
+
+        timeprofile_stamp(tp, "slow");
+
+        for (int i = 0; i < niters; i++) {
+            double q[2];
+            q[0] = 10.0f * random() / RAND_MAX - 2;
+            q[1] = 10.0f * random() / RAND_MAX - 2;
+
+            int v0 = g2d_polygon_contains_point(polyE, q);
+            int v1 = g2d_polygon_contains_point_ref(polyE, q);
+            assert(v0 == v1);
+        }
+
+        timeprofile_stamp(tp, "both");
+        timeprofile_display(tp);
+    }
+
+    if (1) {
+        zarray_t *poly = polyE;
+
+        double res = 0.399;
+        for (double y = 5.2; y >= -.5; y -= res) {
+            double xs[zarray_size(poly)];
+
+            int xsz = g2d_polygon_rasterize(poly, y, xs);
+            int xpos = 0;
+            int inout = 0; // start off "out"
+            for (double x = -3; x < 6; x += res) {
+                while (x > xs[xpos] && xpos < xsz) {
+                    xpos++;
+                    inout ^= 1;
+                }
+
+                if (inout)
+                    printf("y");
+                else
+                    printf(" ");
+            }
+            printf("\n");
+
+            for (double x = -3; x < 6; x += res) {
+                double q[2] = {x, y};
+                if (g2d_polygon_contains_point(poly, q))
+                    printf("X");
+                else
+                    printf(" ");
+            }
+            printf("\n");
+        }
+    }
+
+
+
+/*
+// CW order
+double p[][2] =  { { 0, 0},
+{ -2, 4},
+{1, 5},
+{1, 2},
+{2, 2},
+{4, 0} };
+*/
+
+     double q[2] = { 10, 10 };
+     printf("0==%d\n", g2d_polygon_contains_point(polya, q));
+
+     q[0] = 1; q[1] = 1;
+     printf("1==%d\n", g2d_polygon_contains_point(polya, q));
+
+     q[0] = 3; q[1] = .5;
+     printf("1==%d\n", g2d_polygon_contains_point(polya, q));
+
+     q[0] = 1.2; q[1] = 2.1;
+     printf("0==%d\n", g2d_polygon_contains_point(polya, q));
+
+     printf("0==%d\n", g2d_polygon_contains_polygon(polya, polyb));
+
+     printf("0==%d\n", g2d_polygon_contains_polygon(polya, polyc));
+
+     printf("0==%d\n", g2d_polygon_contains_polygon(polya, polyd));
+
+     ////////////////////////////////////////////////////////
+     // Test convex hull
+     if (1) {
+         zarray_t *hull = g2d_convex_hull(polyE);
+
+         for (int k = 0; k < zarray_size(hull); k++) {
+             double *h;
+             zarray_get_volatile(hull, k, &h);
+
+             printf("%15f, %15f\n", h[0], h[1]);
+         }
+     }
+
+     for (int i = 0; i < 100000; i++) {
+         zarray_t *points = zarray_create(sizeof(double[2]));
+
+         for (int j = 0; j < 100; j++) {
+             double q[2];
+             q[0] = 10.0f * random() / RAND_MAX - 2;
+             q[1] = 10.0f * random() / RAND_MAX - 2;
+
+             zarray_add(points, q);
+         }
+
+         zarray_t *hull = g2d_convex_hull(points);
+         for (int j = 0; j < zarray_size(points); j++) {
+             double *q;
+             zarray_get_volatile(points, j, &q);
+
+             int on_edge;
+
+             double p[2];
+             g2d_polygon_closest_boundary_point(hull, q, p);
+             if (g2d_distance(q, p) < .00001)
+                 on_edge = 1;
+
+             assert(on_edge || g2d_polygon_contains_point(hull, q));
+         }
+
+         zarray_destroy(hull);
+         zarray_destroy(points);
+     }
+}
+#endif
diff --git a/common/g2d.h b/common/g2d.h
new file mode 100644
index 0000000..21c21ac
--- /dev/null
+++ b/common/g2d.h
@@ -0,0 +1,124 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "zarray.h"
+
+// This library tries to avoid needless proliferation of types.
+//
+// A point is a double[2]. (Note that when passing a double[2] as an
+// argument, it is passed by pointer, not by value.)
+//
+// A polygon is a zarray_t of double[2]. (Note that in this case, the
+// zarray contains the actual vertex data, and not merely a pointer to
+// some other data. IMPORTANT: A polygon must be specified in CCW
+// order.  It is implicitly closed (do not list the same point at the
+// beginning at the end.
+//
+// Where sensible, it is assumed that objects should be allocated
+// sparingly; consequently "init" style methods, rather than "create"
+// methods are used.
+
+////////////////////////////////////////////////////////////////////
+// Lines
+
+typedef struct
+{
+    // Internal representation: a point that the line goes through (p) and
+    // the direction of the line (u).
+    double p[2];
+    double u[2]; // always a unit vector
+} g2d_line_t;
+
+// initialize a line object.
+void g2d_line_init_from_points(g2d_line_t *line, const double p0[2], const double p1[2]);
+
+// The line defines a one-dimensional coordinate system whose origin
+// is p. Where is q? (If q is not on the line, the point nearest q is
+// returned.
+double g2d_line_get_coordinate(const g2d_line_t *line, const double q[2]);
+
+// Intersect two lines. The intersection, if it exists, is written to
+// p (if not NULL), and 1 is returned. Else, zero is returned.
+int g2d_line_intersect_line(const g2d_line_t *linea, const g2d_line_t *lineb, double *p);
+
+////////////////////////////////////////////////////////////////////
+// Line Segments. line.p is always one endpoint; p1 is the other
+// endpoint.
+typedef struct
+{
+    g2d_line_t line;
+    double p1[2];
+} g2d_line_segment_t;
+
+void g2d_line_segment_init_from_points(g2d_line_segment_t *seg, const double p0[2], const double p1[2]);
+
+// Intersect two segments. The intersection, if it exists, is written
+// to p (if not NULL), and 1 is returned. Else, zero is returned.
+int g2d_line_segment_intersect_segment(const g2d_line_segment_t *sega, const g2d_line_segment_t *segb, double *p);
+
+void g2d_line_segment_closest_point(const g2d_line_segment_t *seg, const double *q, double *p);
+double g2d_line_segment_closest_point_distance(const g2d_line_segment_t *seg, const double *q);
+
+////////////////////////////////////////////////////////////////////
+// Polygons
+
+zarray_t *g2d_polygon_create_data(double v[][2], int sz);
+
+zarray_t *g2d_polygon_create_zeros(int sz);
+
+zarray_t *g2d_polygon_create_empty();
+
+void g2d_polygon_add(zarray_t *poly, double v[2]);
+
+// Takes a polygon in either CW or CCW and modifies it (if necessary)
+// to be CCW.
+void g2d_polygon_make_ccw(zarray_t *poly);
+
+// Return 1 if point q lies within poly.
+int g2d_polygon_contains_point(const zarray_t *poly, double q[2]);
+
+// Do the edges of the polygons cross? (Does not test for containment).
+int g2d_polygon_intersects_polygon(const zarray_t *polya, const zarray_t *polyb);
+
+// Does polya completely contain polyb?
+int g2d_polygon_contains_polygon(const zarray_t *polya, const zarray_t *polyb);
+
+// Is there some point which is in both polya and polyb?
+int g2d_polygon_overlaps_polygon(const zarray_t *polya, const zarray_t *polyb);
+
+// returns the number of points written to x. see comments.
+int g2d_polygon_rasterize(const zarray_t *poly, double y, double *x);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/getopt.c b/common/getopt.c
new file mode 100644
index 0000000..e645089
--- /dev/null
+++ b/common/getopt.c
@@ -0,0 +1,548 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "zhash.h"
+#include "zarray.h"
+#include "getopt.h"
+#include "common/math_util.h"
+
+#define GOO_BOOL_TYPE 1
+#define GOO_STRING_TYPE 2
+
+typedef struct getopt_option getopt_option_t;
+
+struct getopt_option
+{
+	char *sname;
+	char *lname;
+	char *svalue;
+
+	char *help;
+	int type;
+
+	int spacer;
+
+    int was_specified;
+};
+
+struct getopt
+{
+    zhash_t  *lopts;
+    zhash_t  *sopts;
+    zarray_t   *extraargs;
+    zarray_t   *options;
+};
+
+getopt_t *getopt_create()
+{
+    getopt_t *gopt = (getopt_t*) calloc(1, sizeof(getopt_t));
+
+    gopt->lopts     = zhash_create(sizeof(char*), sizeof(getopt_option_t*), zhash_str_hash, zhash_str_equals);
+    gopt->sopts     = zhash_create(sizeof(char*), sizeof(getopt_option_t*), zhash_str_hash, zhash_str_equals);
+    gopt->options   = zarray_create(sizeof(getopt_option_t*));
+    gopt->extraargs = zarray_create(sizeof(char*));
+
+    return gopt;
+}
+
+void getopt_option_destroy(getopt_option_t *goo)
+{
+    free(goo->sname);
+    free(goo->lname);
+    free(goo->svalue);
+    free(goo->help);
+    memset(goo, 0, sizeof(getopt_option_t));
+    free(goo);
+}
+
+void getopt_destroy(getopt_t *gopt)
+{
+    // free the extra arguments and container
+    zarray_vmap(gopt->extraargs, free);
+    zarray_destroy(gopt->extraargs);
+
+    // deep free of the getopt_option structs. Also frees key/values, so
+    // after this loop, hash tables will no longer work
+    zarray_vmap(gopt->options, getopt_option_destroy);
+    zarray_destroy(gopt->options);
+
+    // free tables
+    zhash_destroy(gopt->lopts);
+    zhash_destroy(gopt->sopts);
+
+    memset(gopt, 0, sizeof(getopt_t));
+    free(gopt);
+}
+
+static void getopt_modify_string(char **str, char *newvalue)
+{
+    char *old = *str;
+    *str = newvalue;
+    if (old != NULL)
+        free(old);
+}
+
+static char *get_arg_assignment(char *arg)
+{
+    // not an arg starting with "--"?
+    if (!str_starts_with(arg, "--")) {
+        return NULL;
+    }
+
+    int eq_index = str_indexof(arg, "=");
+
+    // no assignment?
+    if (eq_index == -1) {
+        return NULL;
+    }
+
+    // no quotes allowed before '=' in "--key=value" option specification.
+    // quotes can be used in value string, or by extra arguments
+    for (int i = 0; i < eq_index; i++) {
+        if (arg[i] == '\'' || arg[i] == '"') {
+            return NULL;
+        }
+    }
+
+    return &arg[eq_index];
+}
+
+// returns 1 if no error
+int getopt_parse(getopt_t *gopt, int argc, char *argv[], int showErrors)
+{
+    int okay = 1;
+    zarray_t *toks = zarray_create(sizeof(char*));
+
+    // take the input stream and chop it up into tokens
+    for (int i = 1; i < argc; i++) {
+
+        char *arg = strdup(argv[i]);
+        char *eq  = get_arg_assignment(arg);
+
+        // no equal sign? Push the whole thing.
+        if (eq == NULL) {
+            zarray_add(toks, &arg);
+        } else {
+            // there was an equal sign. Push the part
+            // before and after the equal sign
+            char *val = strdup(&eq[1]);
+            eq[0] = 0;
+            zarray_add(toks, &arg);
+
+            // if the part after the equal sign is
+            // enclosed by quotation marks, strip them.
+            if (val[0]=='\"') {
+                size_t last = strlen(val) - 1;
+                if (val[last]=='\"')
+                    val[last] = 0;
+                char *valclean = strdup(&val[1]);
+                zarray_add(toks, &valclean);
+                free(val);
+            } else {
+                zarray_add(toks, &val);
+            }
+        }
+    }
+
+    // now loop over the elements and evaluate the arguments
+    unsigned int i = 0;
+
+    char *tok = NULL;
+
+    while (i < zarray_size(toks)) {
+
+        // rather than free statement throughout this while loop
+        if (tok != NULL)
+            free(tok);
+
+        zarray_get(toks, i, &tok);
+
+        if (!strncmp(tok,"--", 2)) {
+            char *optname = &tok[2];
+            getopt_option_t *goo = NULL;
+            zhash_get(gopt->lopts, &optname, &goo);
+            if (goo == NULL) {
+                okay = 0;
+                if (showErrors)
+                    printf("Unknown option --%s\n", optname);
+                i++;
+                continue;
+            }
+
+            goo->was_specified = 1;
+
+            if (goo->type == GOO_BOOL_TYPE) {
+                if ((i+1) < zarray_size(toks)) {
+                    char *val = NULL;
+                    zarray_get(toks, i+1, &val);
+
+                    if (!strcmp(val,"true")) {
+                        i+=2;
+                        getopt_modify_string(&goo->svalue, val);
+                        continue;
+                    }
+                    if (!strcmp(val,"false")) {
+                        i+=2;
+                        getopt_modify_string(&goo->svalue, val);
+                        continue;
+                    }
+                }
+                getopt_modify_string(&goo->svalue, strdup("true"));
+                i++;
+                continue;
+            }
+
+            if (goo->type == GOO_STRING_TYPE) {
+                // TODO: check whether next argument is an option, denoting missing argument
+                if ((i+1) < zarray_size(toks)) {
+                    char *val = NULL;
+                    zarray_get(toks, i+1, &val);
+                    i+=2;
+                    getopt_modify_string(&goo->svalue, val);
+                    continue;
+                }
+
+                okay = 0;
+                if (showErrors)
+                    printf("Option %s requires a string argument.\n",optname);
+            }
+        }
+
+        if (!strncmp(tok,"-",1) && strncmp(tok,"--",2)) {
+            size_t len = strlen(tok);
+            int pos;
+            for (pos = 1; pos < len; pos++) {
+                char sopt[2];
+                sopt[0] = tok[pos];
+                sopt[1] = 0;
+                char *sopt_ptr = (char*) &sopt;
+                getopt_option_t *goo = NULL;
+                zhash_get(gopt->sopts, &sopt_ptr, &goo);
+
+                if (goo==NULL) {
+                    // is the argument a numerical literal that happens to be negative?
+                    if (pos==1 && isdigit(tok[pos])) {
+                        zarray_add(gopt->extraargs, &tok);
+                        tok = NULL;
+                        break;
+                    } else {
+                        okay = 0;
+                        if (showErrors)
+                            printf("Unknown option -%c\n", tok[pos]);
+                        i++;
+                        continue;
+                    }
+                }
+
+                goo->was_specified = 1;
+
+                if (goo->type == GOO_BOOL_TYPE) {
+                    getopt_modify_string(&goo->svalue, strdup("true"));
+                    continue;
+                }
+
+                if (goo->type == GOO_STRING_TYPE) {
+                    if ((i+1) < zarray_size(toks)) {
+                        char *val = NULL;
+                        zarray_get(toks, i+1, &val);
+                        // TODO: allow negative numerical values for short-name options ?
+                        if (val[0]=='-')
+                        {
+                            okay = 0;
+                            if (showErrors)
+                                printf("Ran out of arguments for option block %s\n", tok);
+                        }
+                        i++;
+                        getopt_modify_string(&goo->svalue, val);
+                        continue;
+                    }
+
+                    okay = 0;
+                    if (showErrors)
+                        printf("Option -%c requires a string argument.\n", tok[pos]);
+                }
+            }
+            i++;
+            continue;
+        }
+
+        // it's not an option-- it's an argument.
+        zarray_add(gopt->extraargs, &tok);
+        tok = NULL;
+        i++;
+    }
+    if (tok != NULL)
+        free(tok);
+
+    zarray_destroy(toks);
+
+    return okay;
+}
+
+void getopt_add_spacer(getopt_t *gopt, const char *s)
+{
+    getopt_option_t *goo = (getopt_option_t*) calloc(1, sizeof(getopt_option_t));
+    goo->spacer = 1;
+    goo->help = strdup(s);
+    zarray_add(gopt->options, &goo);
+}
+
+void getopt_add_bool(getopt_t *gopt, char sopt, const char *lname, int def, const char *help)
+{
+    char sname[2];
+    sname[0] = sopt;
+    sname[1] = 0;
+    char *sname_ptr = (char*) &sname;
+
+    if (strlen(lname) < 1) { // must have long name
+        fprintf (stderr, "getopt_add_bool(): must supply option name\n");
+        exit (EXIT_FAILURE);
+    }
+
+    if (sopt == '-') { // short name cannot be '-' (no way to reference)
+        fprintf (stderr, "getopt_add_bool(): invalid option character: '%c'\n", sopt);
+        exit (EXIT_FAILURE);
+    }
+
+    if (zhash_contains(gopt->lopts, &lname)) {
+        fprintf (stderr, "getopt_add_bool(): duplicate option name: --%s\n", lname);
+        exit (EXIT_FAILURE);
+    }
+
+    if (sopt != '\0' && zhash_contains(gopt->sopts, &sname_ptr)) {
+        fprintf (stderr, "getopt_add_bool(): duplicate option: -%s ('%s')\n", sname, lname);
+        exit (EXIT_FAILURE);
+    }
+
+    getopt_option_t *goo = (getopt_option_t*) calloc(1, sizeof(getopt_option_t));
+    goo->sname=strdup(sname);
+    goo->lname=strdup(lname);
+    goo->svalue=strdup(def ? "true" : "false");
+    goo->type=GOO_BOOL_TYPE;
+    goo->help=strdup(help);
+
+    zhash_put(gopt->lopts, &goo->lname, &goo, NULL, NULL);
+    zhash_put(gopt->sopts, &goo->sname, &goo, NULL, NULL);
+    zarray_add(gopt->options, &goo);
+}
+
+void getopt_add_int(getopt_t *gopt, char sopt, const char *lname, const char *def, const char *help)
+{
+    getopt_add_string(gopt, sopt, lname, def, help);
+}
+
+void
+getopt_add_double (getopt_t *gopt, char sopt, const char *lname, const char *def, const char *help)
+{
+    getopt_add_string (gopt, sopt, lname, def, help);
+}
+
+void getopt_add_string(getopt_t *gopt, char sopt, const char *lname, const char *def, const char *help)
+{
+    char sname[2];
+    sname[0] = sopt;
+    sname[1] = 0;
+    char *sname_ptr = (char*) &sname;
+
+    if (strlen(lname) < 1) { // must have long name
+        fprintf (stderr, "getopt_add_string(): must supply option name\n");
+        exit (EXIT_FAILURE);
+    }
+
+    if (sopt == '-') { // short name cannot be '-' (no way to reference)
+        fprintf (stderr, "getopt_add_string(): invalid option character: '%c'\n", sopt);
+        exit (EXIT_FAILURE);
+    }
+
+    if (zhash_contains(gopt->lopts, &lname)) {
+        fprintf (stderr, "getopt_add_string(): duplicate option name: --%s\n", lname);
+        exit (EXIT_FAILURE);
+    }
+
+    if (sopt != '\0' && zhash_contains(gopt->sopts, &sname_ptr)) {
+        fprintf (stderr, "getopt_add_string(): duplicate option: -%s ('%s')\n", sname, lname);
+        exit (EXIT_FAILURE);
+    }
+
+    getopt_option_t *goo = (getopt_option_t*) calloc(1, sizeof(getopt_option_t));
+    goo->sname=strdup(sname);
+    goo->lname=strdup(lname);
+    goo->svalue=strdup(def);
+    goo->type=GOO_STRING_TYPE;
+    goo->help=strdup(help);
+
+    zhash_put(gopt->lopts, &goo->lname, &goo, NULL, NULL);
+    zhash_put(gopt->sopts, &goo->sname, &goo, NULL, NULL);
+    zarray_add(gopt->options, &goo);
+}
+
+const char *getopt_get_string(getopt_t *gopt, const char *lname)
+{
+    getopt_option_t *goo = NULL;
+    zhash_get(gopt->lopts, &lname, &goo);
+    // could return null, but this would be the only
+    // method that doesn't assert on a missing key
+    assert (goo != NULL);
+    return goo->svalue;
+}
+
+int getopt_get_int(getopt_t *getopt, const char *lname)
+{
+    const char *v = getopt_get_string(getopt, lname);
+    assert(v != NULL);
+
+    errno = 0;
+    char *endptr = (char *) v;
+    long val = strtol(v, &endptr, 10);
+
+    if (errno != 0) {
+        fprintf (stderr, "--%s argument: strtol failed: %s\n", lname, strerror(errno));
+        exit (EXIT_FAILURE);
+    }
+
+    if (endptr == v) {
+        fprintf (stderr, "--%s argument cannot be parsed as an int\n", lname);
+        exit (EXIT_FAILURE);
+    }
+
+    return (int) val;
+}
+
+int getopt_get_bool(getopt_t *getopt, const char *lname)
+{
+    const char *v = getopt_get_string(getopt, lname);
+    assert (v!=NULL);
+    int val = !strcmp(v, "true");
+    return val;
+}
+
+double getopt_get_double (getopt_t *getopt, const char *lname)
+{
+    const char *v = getopt_get_string (getopt, lname);
+    assert (v!=NULL);
+
+    errno = 0;
+    char *endptr = (char *) v;
+    double d = strtod (v, &endptr);
+
+    if (errno != 0) {
+        fprintf (stderr, "--%s argument: strtod failed: %s\n", lname, strerror(errno));
+        exit (EXIT_FAILURE);
+    }
+
+    if (endptr == v) {
+        fprintf (stderr, "--%s argument cannot be parsed as a double\n", lname);
+        exit (EXIT_FAILURE);
+    }
+
+    return d;
+}
+
+int getopt_was_specified(getopt_t *getopt, const char *lname)
+{
+    getopt_option_t *goo = NULL;
+    zhash_get(getopt->lopts, &lname, &goo);
+    if (goo == NULL)
+        return 0;
+
+    return goo->was_specified;
+}
+
+const zarray_t *getopt_get_extra_args(getopt_t *gopt)
+{
+    return gopt->extraargs;
+}
+
+void getopt_do_usage(getopt_t * gopt)
+{
+    char * usage = getopt_get_usage(gopt);
+    printf("%s", usage);
+    free(usage);
+}
+
+char * getopt_get_usage(getopt_t *gopt)
+{
+    string_buffer_t * sb = string_buffer_create();
+
+    int leftmargin=2;
+    int longwidth=12;
+    int valuewidth=10;
+
+    for (unsigned int i = 0; i < zarray_size(gopt->options); i++) {
+        getopt_option_t *goo = NULL;
+        zarray_get(gopt->options, i, &goo);
+
+        if (goo->spacer)
+            continue;
+
+        longwidth = max(longwidth, (int) strlen(goo->lname));
+
+        if (goo->type == GOO_STRING_TYPE)
+            valuewidth = max(valuewidth, (int) strlen(goo->svalue));
+    }
+
+    for (unsigned int i = 0; i < zarray_size(gopt->options); i++) {
+        getopt_option_t *goo = NULL;
+        zarray_get(gopt->options, i, &goo);
+
+        if (goo->spacer)
+        {
+            if (goo->help==NULL || strlen(goo->help)==0)
+                string_buffer_appendf(sb,"\n");
+            else
+                string_buffer_appendf(sb,"\n%*s%s\n\n", leftmargin, "", goo->help);
+            continue;
+        }
+
+        string_buffer_appendf(sb,"%*s", leftmargin, "");
+
+        if (goo->sname[0]==0)
+            string_buffer_appendf(sb,"     ");
+        else
+            string_buffer_appendf(sb,"-%c | ", goo->sname[0]);
+
+        string_buffer_appendf(sb,"--%*s ", -longwidth, goo->lname);
+
+        string_buffer_appendf(sb," [ %s ]", goo->svalue); // XXX: displays current value rather than default value
+
+        string_buffer_appendf(sb,"%*s", (int) (valuewidth-strlen(goo->svalue)), "");
+
+        string_buffer_appendf(sb," %s   ", goo->help);
+        string_buffer_appendf(sb,"\n");
+    }
+
+    char * usage = string_buffer_to_string(sb);
+    string_buffer_destroy(sb);
+    return usage;
+}
diff --git a/common/getopt.h b/common/getopt.h
new file mode 100644
index 0000000..69dbb05
--- /dev/null
+++ b/common/getopt.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include "zarray.h"
+#include "string_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct getopt getopt_t;
+
+getopt_t *getopt_create();
+void getopt_destroy(getopt_t *gopt);
+
+// Parse args. Returns 1 on success
+int getopt_parse(getopt_t *gopt, int argc, char *argv[], int showErrors);
+void getopt_do_usage(getopt_t *gopt);
+
+// Returns a string containing the usage. Must be freed by caller
+char * getopt_get_usage(getopt_t *gopt);
+
+void getopt_add_spacer(getopt_t *gopt, const char *s);
+void getopt_add_bool(getopt_t *gopt, char sopt, const char *lname, int def, const char *help);
+void getopt_add_int(getopt_t *gopt, char sopt, const char *lname, const char *def, const char *help);
+void getopt_add_string(getopt_t *gopt, char sopt, const char *lname, const char *def, const char *help);
+void getopt_add_double(getopt_t *gopt, char sopt, const char *lname, const char *def, const char *help);
+
+const char *getopt_get_string(getopt_t *gopt, const char *lname);
+int getopt_get_int(getopt_t *getopt, const char *lname);
+int getopt_get_bool(getopt_t *getopt, const char *lname);
+double getopt_get_double(getopt_t *getopt, const char *lname);
+int getopt_was_specified(getopt_t *gopt, const char *lname);
+const zarray_t *getopt_get_extra_args(getopt_t *gopt);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/homography.c b/common/homography.c
new file mode 100644
index 0000000..48e7f02
--- /dev/null
+++ b/common/homography.c
@@ -0,0 +1,479 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <math.h>
+
+#include "common/matd.h"
+#include "common/zarray.h"
+#include "common/homography.h"
+#include "common/math_util.h"
+
+// correspondences is a list of float[4]s, consisting of the points x
+// and y concatenated. We will compute a homography such that y = Hx
+matd_t *homography_compute(zarray_t *correspondences, int flags)
+{
+    // compute centroids of both sets of points (yields a better
+    // conditioned information matrix)
+    double x_cx = 0, x_cy = 0;
+    double y_cx = 0, y_cy = 0;
+
+    for (int i = 0; i < zarray_size(correspondences); i++) {
+        float *c;
+        zarray_get_volatile(correspondences, i, &c);
+
+        x_cx += c[0];
+        x_cy += c[1];
+        y_cx += c[2];
+        y_cy += c[3];
+    }
+
+    int sz = zarray_size(correspondences);
+    x_cx /= sz;
+    x_cy /= sz;
+    y_cx /= sz;
+    y_cy /= sz;
+
+    // NB We don't normalize scale; it seems implausible that it could
+    // possibly make any difference given the dynamic range of IEEE
+    // doubles.
+
+    matd_t *A = matd_create(9,9);
+    for (int i = 0; i < zarray_size(correspondences); i++) {
+        float *c;
+        zarray_get_volatile(correspondences, i, &c);
+
+        // (below world is "x", and image is "y")
+        double worldx = c[0] - x_cx;
+        double worldy = c[1] - x_cy;
+        double imagex = c[2] - y_cx;
+        double imagey = c[3] - y_cy;
+
+        double a03 = -worldx;
+        double a04 = -worldy;
+        double a05 = -1;
+        double a06 = worldx*imagey;
+        double a07 = worldy*imagey;
+        double a08 = imagey;
+
+        MATD_EL(A, 3, 3) += a03*a03;
+        MATD_EL(A, 3, 4) += a03*a04;
+        MATD_EL(A, 3, 5) += a03*a05;
+        MATD_EL(A, 3, 6) += a03*a06;
+        MATD_EL(A, 3, 7) += a03*a07;
+        MATD_EL(A, 3, 8) += a03*a08;
+        MATD_EL(A, 4, 4) += a04*a04;
+        MATD_EL(A, 4, 5) += a04*a05;
+        MATD_EL(A, 4, 6) += a04*a06;
+        MATD_EL(A, 4, 7) += a04*a07;
+        MATD_EL(A, 4, 8) += a04*a08;
+        MATD_EL(A, 5, 5) += a05*a05;
+        MATD_EL(A, 5, 6) += a05*a06;
+        MATD_EL(A, 5, 7) += a05*a07;
+        MATD_EL(A, 5, 8) += a05*a08;
+        MATD_EL(A, 6, 6) += a06*a06;
+        MATD_EL(A, 6, 7) += a06*a07;
+        MATD_EL(A, 6, 8) += a06*a08;
+        MATD_EL(A, 7, 7) += a07*a07;
+        MATD_EL(A, 7, 8) += a07*a08;
+        MATD_EL(A, 8, 8) += a08*a08;
+
+        double a10 = worldx;
+        double a11 = worldy;
+        double a12 = 1;
+        double a16 = -worldx*imagex;
+        double a17 = -worldy*imagex;
+        double a18 = -imagex;
+
+        MATD_EL(A, 0, 0) += a10*a10;
+        MATD_EL(A, 0, 1) += a10*a11;
+        MATD_EL(A, 0, 2) += a10*a12;
+        MATD_EL(A, 0, 6) += a10*a16;
+        MATD_EL(A, 0, 7) += a10*a17;
+        MATD_EL(A, 0, 8) += a10*a18;
+        MATD_EL(A, 1, 1) += a11*a11;
+        MATD_EL(A, 1, 2) += a11*a12;
+        MATD_EL(A, 1, 6) += a11*a16;
+        MATD_EL(A, 1, 7) += a11*a17;
+        MATD_EL(A, 1, 8) += a11*a18;
+        MATD_EL(A, 2, 2) += a12*a12;
+        MATD_EL(A, 2, 6) += a12*a16;
+        MATD_EL(A, 2, 7) += a12*a17;
+        MATD_EL(A, 2, 8) += a12*a18;
+        MATD_EL(A, 6, 6) += a16*a16;
+        MATD_EL(A, 6, 7) += a16*a17;
+        MATD_EL(A, 6, 8) += a16*a18;
+        MATD_EL(A, 7, 7) += a17*a17;
+        MATD_EL(A, 7, 8) += a17*a18;
+        MATD_EL(A, 8, 8) += a18*a18;
+
+        double a20 = -worldx*imagey;
+        double a21 = -worldy*imagey;
+        double a22 = -imagey;
+        double a23 = worldx*imagex;
+        double a24 = worldy*imagex;
+        double a25 = imagex;
+
+        MATD_EL(A, 0, 0) += a20*a20;
+        MATD_EL(A, 0, 1) += a20*a21;
+        MATD_EL(A, 0, 2) += a20*a22;
+        MATD_EL(A, 0, 3) += a20*a23;
+        MATD_EL(A, 0, 4) += a20*a24;
+        MATD_EL(A, 0, 5) += a20*a25;
+        MATD_EL(A, 1, 1) += a21*a21;
+        MATD_EL(A, 1, 2) += a21*a22;
+        MATD_EL(A, 1, 3) += a21*a23;
+        MATD_EL(A, 1, 4) += a21*a24;
+        MATD_EL(A, 1, 5) += a21*a25;
+        MATD_EL(A, 2, 2) += a22*a22;
+        MATD_EL(A, 2, 3) += a22*a23;
+        MATD_EL(A, 2, 4) += a22*a24;
+        MATD_EL(A, 2, 5) += a22*a25;
+        MATD_EL(A, 3, 3) += a23*a23;
+        MATD_EL(A, 3, 4) += a23*a24;
+        MATD_EL(A, 3, 5) += a23*a25;
+        MATD_EL(A, 4, 4) += a24*a24;
+        MATD_EL(A, 4, 5) += a24*a25;
+        MATD_EL(A, 5, 5) += a25*a25;
+    }
+
+    // make symmetric
+    for (int i = 0; i < 9; i++)
+        for (int j = i+1; j < 9; j++)
+            MATD_EL(A, j, i) = MATD_EL(A, i, j);
+
+    matd_t *H = matd_create(3,3);
+
+    if (flags & HOMOGRAPHY_COMPUTE_FLAG_INVERSE) {
+        // compute singular vector by (carefully) inverting the rank-deficient matrix.
+
+        if (1) {
+            matd_t *Ainv = matd_inverse(A);
+            double scale = 0;
+
+            for (int i = 0; i < 9; i++)
+                scale += sq(MATD_EL(Ainv, i, 0));
+            scale = sqrt(scale);
+
+            for (int i = 0; i < 3; i++)
+                for (int j = 0; j < 3; j++)
+                    MATD_EL(H, i, j) = MATD_EL(Ainv, 3*i+j, 0) / scale;
+
+            matd_destroy(Ainv);
+        } else {
+
+            matd_t *b = matd_create_data(9, 1, (double[]) { 1, 0, 0, 0, 0, 0, 0, 0, 0 });
+            matd_t *Ainv = NULL;
+
+            if (0) {
+                matd_plu_t *lu = matd_plu(A);
+                Ainv = matd_plu_solve(lu, b);
+                matd_plu_destroy(lu);
+            } else {
+                matd_chol_t *chol = matd_chol(A);
+                Ainv = matd_chol_solve(chol, b);
+                matd_chol_destroy(chol);
+            }
+
+            double scale = 0;
+
+            for (int i = 0; i < 9; i++)
+                scale += sq(MATD_EL(Ainv, i, 0));
+            scale = sqrt(scale);
+
+            for (int i = 0; i < 3; i++)
+                for (int j = 0; j < 3; j++)
+                    MATD_EL(H, i, j) = MATD_EL(Ainv, 3*i+j, 0) / scale;
+
+            matd_destroy(b);
+            matd_destroy(Ainv);
+        }
+
+    } else {
+        // compute singular vector using SVD. A bit slower, but more accurate.
+        matd_svd_t svd = matd_svd_flags(A, MATD_SVD_NO_WARNINGS);
+
+        for (int i = 0; i < 3; i++)
+            for (int j = 0; j < 3; j++)
+                MATD_EL(H, i, j) = MATD_EL(svd.U, 3*i+j, 8);
+
+        matd_destroy(svd.U);
+        matd_destroy(svd.S);
+        matd_destroy(svd.V);
+
+    }
+
+    matd_t *Tx = matd_identity(3);
+    MATD_EL(Tx,0,2) = -x_cx;
+    MATD_EL(Tx,1,2) = -x_cy;
+
+    matd_t *Ty = matd_identity(3);
+    MATD_EL(Ty,0,2) = y_cx;
+    MATD_EL(Ty,1,2) = y_cy;
+
+    matd_t *H2 = matd_op("M*M*M", Ty, H, Tx);
+
+    matd_destroy(A);
+    matd_destroy(Tx);
+    matd_destroy(Ty);
+    matd_destroy(H);
+
+    return H2;
+}
+
+
+// assuming that the projection matrix is:
+// [ fx 0  cx 0 ]
+// [  0 fy cy 0 ]
+// [  0  0  1 0 ]
+//
+// And that the homography is equal to the projection matrix times the
+// model matrix, recover the model matrix (which is returned). Note
+// that the third column of the model matrix is missing in the
+// expresison below, reflecting the fact that the homography assumes
+// all points are at z=0 (i.e., planar) and that the element of z is
+// thus omitted.  (3x1 instead of 4x1).
+//
+// [ fx 0  cx 0 ] [ R00  R01  TX ]    [ H00 H01 H02 ]
+// [  0 fy cy 0 ] [ R10  R11  TY ] =  [ H10 H11 H12 ]
+// [  0  0  1 0 ] [ R20  R21  TZ ] =  [ H20 H21 H22 ]
+//                [  0    0    1 ]
+//
+// fx*R00 + cx*R20 = H00   (note, H only known up to scale; some additional adjustments required; see code.)
+// fx*R01 + cx*R21 = H01
+// fx*TX  + cx*TZ  = H02
+// fy*R10 + cy*R20 = H10
+// fy*R11 + cy*R21 = H11
+// fy*TY  + cy*TZ  = H12
+// R20 = H20
+// R21 = H21
+// TZ  = H22
+
+matd_t *homography_to_pose(const matd_t *H, double fx, double fy, double cx, double cy)
+{
+    // Note that every variable that we compute is proportional to the scale factor of H.
+    double R20 = MATD_EL(H, 2, 0);
+    double R21 = MATD_EL(H, 2, 1);
+    double TZ  = MATD_EL(H, 2, 2);
+    double R00 = (MATD_EL(H, 0, 0) - cx*R20) / fx;
+    double R01 = (MATD_EL(H, 0, 1) - cx*R21) / fx;
+    double TX  = (MATD_EL(H, 0, 2) - cx*TZ)  / fx;
+    double R10 = (MATD_EL(H, 1, 0) - cy*R20) / fy;
+    double R11 = (MATD_EL(H, 1, 1) - cy*R21) / fy;
+    double TY  = (MATD_EL(H, 1, 2) - cy*TZ)  / fy;
+
+    // compute the scale by requiring that the rotation columns are unit length
+    // (Use geometric average of the two length vectors we have)
+    double length1 = sqrtf(R00*R00 + R10*R10 + R20*R20);
+    double length2 = sqrtf(R01*R01 + R11*R11 + R21*R21);
+    double s = 1.0 / sqrtf(length1 * length2);
+
+    // get sign of S by requiring the tag to be in front the camera;
+    // we assume camera looks in the -Z direction.
+    if (TZ > 0)
+        s *= -1;
+
+    R20 *= s;
+    R21 *= s;
+    TZ  *= s;
+    R00 *= s;
+    R01 *= s;
+    TX  *= s;
+    R10 *= s;
+    R11 *= s;
+    TY  *= s;
+
+    // now recover [R02 R12 R22] by noting that it is the cross product of the other two columns.
+    double R02 = R10*R21 - R20*R11;
+    double R12 = R20*R01 - R00*R21;
+    double R22 = R00*R11 - R10*R01;
+
+    // Improve rotation matrix by applying polar decomposition.
+    if (1) {
+        // do polar decomposition. This makes the rotation matrix
+        // "proper", but probably increases the reprojection error. An
+        // iterative alignment step would be superior.
+
+        matd_t *R = matd_create_data(3, 3, (double[]) { R00, R01, R02,
+                                                       R10, R11, R12,
+                                                       R20, R21, R22 });
+
+        matd_svd_t svd = matd_svd(R);
+        matd_destroy(R);
+
+        R = matd_op("M*M'", svd.U, svd.V);
+
+        matd_destroy(svd.U);
+        matd_destroy(svd.S);
+        matd_destroy(svd.V);
+
+        R00 = MATD_EL(R, 0, 0);
+        R01 = MATD_EL(R, 0, 1);
+        R02 = MATD_EL(R, 0, 2);
+        R10 = MATD_EL(R, 1, 0);
+        R11 = MATD_EL(R, 1, 1);
+        R12 = MATD_EL(R, 1, 2);
+        R20 = MATD_EL(R, 2, 0);
+        R21 = MATD_EL(R, 2, 1);
+        R22 = MATD_EL(R, 2, 2);
+
+        matd_destroy(R);
+    }
+
+    return matd_create_data(4, 4, (double[]) { R00, R01, R02, TX,
+                                               R10, R11, R12, TY,
+                                               R20, R21, R22, TZ,
+                                                0, 0, 0, 1 });
+}
+
+// Similar to above
+// Recover the model view matrix assuming that the projection matrix is:
+//
+// [ F  0  A  0 ]     (see glFrustrum)
+// [ 0  G  B  0 ]
+// [ 0  0  C  D ]
+// [ 0  0 -1  0 ]
+
+matd_t *homography_to_model_view(const matd_t *H, double F, double G, double A, double B, double C, double D)
+{
+    // Note that every variable that we compute is proportional to the scale factor of H.
+    double R20 = -MATD_EL(H, 2, 0);
+    double R21 = -MATD_EL(H, 2, 1);
+    double TZ  = -MATD_EL(H, 2, 2);
+    double R00 = (MATD_EL(H, 0, 0) - A*R20) / F;
+    double R01 = (MATD_EL(H, 0, 1) - A*R21) / F;
+    double TX  = (MATD_EL(H, 0, 2) - A*TZ)  / F;
+    double R10 = (MATD_EL(H, 1, 0) - B*R20) / G;
+    double R11 = (MATD_EL(H, 1, 1) - B*R21) / G;
+    double TY  = (MATD_EL(H, 1, 2) - B*TZ)  / G;
+
+    // compute the scale by requiring that the rotation columns are unit length
+    // (Use geometric average of the two length vectors we have)
+    double length1 = sqrtf(R00*R00 + R10*R10 + R20*R20);
+    double length2 = sqrtf(R01*R01 + R11*R11 + R21*R21);
+    double s = 1.0 / sqrtf(length1 * length2);
+
+    // get sign of S by requiring the tag to be in front of the camera
+    // (which is Z < 0) for our conventions.
+    if (TZ > 0)
+        s *= -1;
+
+    R20 *= s;
+    R21 *= s;
+    TZ  *= s;
+    R00 *= s;
+    R01 *= s;
+    TX  *= s;
+    R10 *= s;
+    R11 *= s;
+    TY  *= s;
+
+    // now recover [R02 R12 R22] by noting that it is the cross product of the other two columns.
+    double R02 = R10*R21 - R20*R11;
+    double R12 = R20*R01 - R00*R21;
+    double R22 = R00*R11 - R10*R01;
+
+    // TODO XXX: Improve rotation matrix by applying polar decomposition.
+
+    return matd_create_data(4, 4, (double[]) { R00, R01, R02, TX,
+        R10, R11, R12, TY,
+        R20, R21, R22, TZ,
+        0, 0, 0, 1 });
+}
+
+// Only uses the upper 3x3 matrix.
+/*
+static void matrix_to_quat(const matd_t *R, double q[4])
+{
+    // see: "from quaternion to matrix and back"
+
+    // trace: get the same result if R is 4x4 or 3x3:
+    double T = MATD_EL(R, 0, 0) + MATD_EL(R, 1, 1) + MATD_EL(R, 2, 2) + 1;
+    double S = 0;
+
+    double m0  = MATD_EL(R, 0, 0);
+    double m1  = MATD_EL(R, 1, 0);
+    double m2  = MATD_EL(R, 2, 0);
+    double m4  = MATD_EL(R, 0, 1);
+    double m5  = MATD_EL(R, 1, 1);
+    double m6  = MATD_EL(R, 2, 1);
+    double m8  = MATD_EL(R, 0, 2);
+    double m9  = MATD_EL(R, 1, 2);
+    double m10 = MATD_EL(R, 2, 2);
+
+    if (T > 0.0000001) {
+        S = sqrtf(T) * 2;
+        q[1] = -( m9 - m6 ) / S;
+        q[2] = -( m2 - m8 ) / S;
+        q[3] = -( m4 - m1 ) / S;
+        q[0] = 0.25 * S;
+    } else if ( m0 > m5 && m0 > m10 )  {	// Column 0:
+        S  = sqrtf( 1.0 + m0 - m5 - m10 ) * 2;
+        q[1] = -0.25 * S;
+        q[2] = -(m4 + m1 ) / S;
+        q[3] = -(m2 + m8 ) / S;
+        q[0] = (m9 - m6 ) / S;
+    } else if ( m5 > m10 ) {			// Column 1:
+        S  = sqrtf( 1.0 + m5 - m0 - m10 ) * 2;
+        q[1] = -(m4 + m1 ) / S;
+        q[2] = -0.25 * S;
+        q[3] = -(m9 + m6 ) / S;
+        q[0] = (m2 - m8 ) / S;
+    } else {
+        // Column 2:
+        S  = sqrtf( 1.0 + m10 - m0 - m5 ) * 2;
+        q[1] = -(m2 + m8 ) / S;
+        q[2] = -(m9 + m6 ) / S;
+        q[3] = -0.25 * S;
+        q[0] = (m4 - m1 ) / S;
+    }
+
+    double mag2 = 0;
+    for (int i = 0; i < 4; i++)
+        mag2 += q[i]*q[i];
+    double norm = 1.0 / sqrtf(mag2);
+    for (int i = 0; i < 4; i++)
+        q[i] *= norm;
+}
+*/
+
+// overwrites upper 3x3 area of matrix M. Doesn't touch any other elements of M.
+void quat_to_matrix(const double q[4], matd_t *M)
+{
+    double w = q[0], x = q[1], y = q[2], z = q[3];
+
+    MATD_EL(M, 0, 0) = w*w + x*x - y*y - z*z;
+    MATD_EL(M, 0, 1) = 2*x*y - 2*w*z;
+    MATD_EL(M, 0, 2) = 2*x*z + 2*w*y;
+
+    MATD_EL(M, 1, 0) = 2*x*y + 2*w*z;
+    MATD_EL(M, 1, 1) = w*w - x*x + y*y - z*z;
+    MATD_EL(M, 1, 2) = 2*y*z - 2*w*x;
+
+    MATD_EL(M, 2, 0) = 2*x*z - 2*w*y;
+    MATD_EL(M, 2, 1) = 2*y*z + 2*w*x;
+    MATD_EL(M, 2, 2) = w*w - x*x - y*y + z*z;
+}
diff --git a/common/homography.h b/common/homography.h
new file mode 100644
index 0000000..c92193f
--- /dev/null
+++ b/common/homography.h
@@ -0,0 +1,183 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include "matd.h"
+#include "zarray.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    /** Given a 3x3 homography matrix and the focal lengths of the
+     * camera, compute the pose of the tag. The focal lengths should
+     * be given in pixels. For example, if the camera's focal length
+     * is twice the width of the sensor, and the sensor is 600 pixels
+     * across, the focal length in pixels is 2*600. Note that the
+     * focal lengths in the fx and fy direction will be approximately
+     * equal for most lenses, and is not a function of aspect ratio.
+     *
+     * Theory: The homography matrix is the product of the camera
+     * projection matrix and the tag's pose matrix (the matrix that
+     * projects points from the tag's local coordinate system to the
+     * camera's coordinate frame).
+     *
+    * [ h00 h01 h02 h03] = [ fx   0     cx 0 ] [ R00 R01 R02 TX ]
+     * [ h10 h11 h12 h13] = [ 0    fy    cy 0 ] [ R10 R11 R12 TY ]
+     * [ h20 h21 h22 h23] = [ 0    0      s 0 ] [ R20 R21 R22 TZ ]
+     *                                          [ 0   0   0   1  ]
+     *
+     * fx is the focal length in the x direction of the camera
+     * (typically measured in pixels), fy is the focal length. cx and
+     * cy give the focal center (usually the middle of the image), and
+     * s is either +1 or -1, depending on the conventions you use. (We
+     * use 1.)
+
+     * When observing a tag, the points we project in world space all
+     * have z=0, so we can form a 3x3 matrix by eliminating the 3rd
+     * column of the pose matrix.
+     *
+     * [ h00 h01 h02 ] = [ fx   0    cx 0 ] [ R00 R01 TX ]
+     * [ h10 h11 h12 ] = [ 0    fy   cy 0 ] [ R10 R11 TY ]
+     * [ h20 h21 h22 ] = [ 0    0     s 0 ] [ R20 R21 TZ ]
+     *                                      [ 0   0   1  ]
+     *
+     * (note that these h's are different from the ones above.)
+     *
+     * We can multiply the right-hand side to yield a set of equations
+     * relating the values of h to the values of the pose matrix.
+     *
+     * There are two wrinkles. The first is that the homography matrix
+     * is known only up to scale. We recover the unknown scale by
+     * constraining the magnitude of the first two columns of the pose
+     * matrix to be 1. We use the geometric average scale. The sign of
+     * the scale factor is recovered by constraining the observed tag
+     * to be in front of the camera. Once scaled, we recover the first
+     * two colmuns of the rotation matrix. The third column is the
+     * cross product of these.
+     *
+     * The second wrinkle is that the computed rotation matrix might
+     * not be exactly orthogonal, so we perform a polar decomposition
+     * to find a good pure rotation approximation.
+     *
+     * Tagsize is the size of the tag in your desired units. I.e., if
+     * your tag measures 0.25m along the side, your tag size is
+     * 0.25. (The homography is computed in terms of *half* the tag
+     * size, i.e., that a tag is 2 units wide as it spans from -1 to
+     * +1, but this code makes the appropriate adjustment.)
+     *
+     * A note on signs:
+     *
+     * The code below incorporates no additional negative signs, but
+     * respects the sign of any parameters that you pass in. Flipping
+     * the signs allows you to modify the projection to suit a wide
+     * variety of conditions.
+     *
+     * In the "pure geometry" projection matrix, the image appears
+     * upside down; i.e., the x and y coordinates on the left hand
+     * side are the opposite of those on the right of the camera
+     * projection matrix. This would happen for all parameters
+     * positive: recall that points in front of the camera have
+     * negative Z values, which will cause the sign of all points to
+     * flip.
+     *
+     * However, most cameras flip things so that the image appears
+     * "right side up" as though you were looking through the lens
+     * directly. This means that the projected points should have the
+     * same sign as the points on the right of the camera projection
+     * matrix. To achieve this, flip fx and fy.
+     *
+     * One further complication: cameras typically put y=0 at the top
+     * of the image, instead of the bottom. Thus you generally want to
+     * flip y yet again (so it's now positive again).
+     *
+     * General advice: you probably want fx negative, fy positive, cx
+     * and cy positive, and s=1.
+     **/
+
+// correspondences is a list of float[4]s, consisting of the points x
+// and y concatenated. We will compute a homography such that y = Hx
+// Specifically, float [] { a, b, c, d } where x = [a b], y = [c d].
+
+
+#define HOMOGRAPHY_COMPUTE_FLAG_INVERSE 1
+#define HOMOGRAPHY_COMPUTE_FLAG_SVD 0
+
+matd_t *homography_compute(zarray_t *correspondences, int flags);
+
+//void homography_project(const matd_t *H, double x, double y, double *ox, double *oy);
+static inline void homography_project(const matd_t *H, double x, double y, double *ox, double *oy)
+{
+    double xx = MATD_EL(H, 0, 0)*x + MATD_EL(H, 0, 1)*y + MATD_EL(H, 0, 2);
+    double yy = MATD_EL(H, 1, 0)*x + MATD_EL(H, 1, 1)*y + MATD_EL(H, 1, 2);
+    double zz = MATD_EL(H, 2, 0)*x + MATD_EL(H, 2, 1)*y + MATD_EL(H, 2, 2);
+
+    *ox = xx / zz;
+    *oy = yy / zz;
+}
+
+// assuming that the projection matrix is:
+// [ fx 0  cx 0 ]
+// [  0 fy cy 0 ]
+// [  0  0  1 0 ]
+//
+// And that the homography is equal to the projection matrix times the model matrix,
+// recover the model matrix (which is returned). Note that the third column of the model
+// matrix is missing in the expresison below, reflecting the fact that the homography assumes
+// all points are at z=0 (i.e., planar) and that the element of z is thus omitted.
+// (3x1 instead of 4x1).
+//
+// [ fx 0  cx 0 ] [ R00  R01  TX ]    [ H00 H01 H02 ]
+// [  0 fy cy 0 ] [ R10  R11  TY ] =  [ H10 H11 H12 ]
+// [  0  0  1 0 ] [ R20  R21  TZ ] =  [ H20 H21 H22 ]
+//                [  0    0    1 ]
+//
+// fx*R00 + cx*R20 = H00   (note, H only known up to scale; some additional adjustments required; see code.)
+// fx*R01 + cx*R21 = H01
+// fx*TX  + cx*TZ  = H02
+// fy*R10 + cy*R20 = H10
+// fy*R11 + cy*R21 = H11
+// fy*TY  + cy*TZ  = H12
+// R20 = H20
+// R21 = H21
+// TZ  = H22
+matd_t *homography_to_pose(const matd_t *H, double fx, double fy, double cx, double cy);
+
+// Similar to above
+// Recover the model view matrix assuming that the projection matrix is:
+//
+// [ F  0  A  0 ]     (see glFrustrum)
+// [ 0  G  B  0 ]
+// [ 0  0  C  D ]
+// [ 0  0 -1  0 ]
+
+matd_t *homography_to_model_view(const matd_t *H, double F, double G, double A, double B, double C, double D);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/image_types.h b/common/image_types.h
new file mode 100644
index 0000000..58e5258
--- /dev/null
+++ b/common/image_types.h
@@ -0,0 +1,84 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+
+// to support conversions between different types, we define all image
+// types at once. Type-specific implementations can then #include this
+// file, assured that the basic types of each image are known.
+
+typedef struct image_u8 image_u8_t;
+struct image_u8
+{
+    const int32_t width;
+    const int32_t height;
+    const int32_t stride;
+
+    uint8_t *buf;
+};
+
+typedef struct image_u8x3 image_u8x3_t;
+struct image_u8x3
+{
+    const int32_t width;
+    const int32_t height;
+    const int32_t stride; // bytes per line
+
+    uint8_t *buf;
+};
+
+typedef struct image_u8x4 image_u8x4_t;
+struct image_u8x4
+{
+    const int32_t width;
+    const int32_t height;
+    const int32_t stride; // bytes per line
+
+    uint8_t *buf;
+};
+
+typedef struct image_f32 image_f32_t;
+struct image_f32
+{
+    const int32_t width;
+    const int32_t height;
+    const int32_t stride; // floats per line
+
+    float *buf; // indexed as buf[y*stride + x]
+};
+
+typedef struct image_u32 image_u32_t;
+struct image_u32
+{
+    const int32_t width;
+    const int32_t height;
+    const int32_t stride; // int32_ts per line
+
+    uint32_t *buf; // indexed as buf[y*stride + x]
+};
diff --git a/common/image_u8.c b/common/image_u8.c
new file mode 100644
index 0000000..eef6c04
--- /dev/null
+++ b/common/image_u8.c
@@ -0,0 +1,557 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "common/image_u8.h"
+#include "common/pnm.h"
+#include "common/math_util.h"
+
+// least common multiple of 64 (sandy bridge cache line) and 24 (stride
+// needed for RGB in 8-wide vector processing)
+#define DEFAULT_ALIGNMENT_U8 96
+
+image_u8_t *image_u8_create_stride(unsigned int width, unsigned int height, unsigned int stride)
+{
+    uint8_t *buf = calloc(height*stride, sizeof(uint8_t));
+
+    // const initializer
+    image_u8_t tmp = { .width = width, .height = height, .stride = stride, .buf = buf };
+
+    image_u8_t *im = calloc(1, sizeof(image_u8_t));
+    memcpy(im, &tmp, sizeof(image_u8_t));
+    return im;
+}
+
+image_u8_t *image_u8_create(unsigned int width, unsigned int height)
+{
+    return image_u8_create_alignment(width, height, DEFAULT_ALIGNMENT_U8);
+}
+
+image_u8_t *image_u8_create_alignment(unsigned int width, unsigned int height, unsigned int alignment)
+{
+    int stride = width;
+
+    if ((stride % alignment) != 0)
+        stride += alignment - (stride % alignment);
+
+    return image_u8_create_stride(width, height, stride);
+}
+
+image_u8_t *image_u8_copy(const image_u8_t *in)
+{
+    uint8_t *buf = malloc(in->height*in->stride*sizeof(uint8_t));
+    memcpy(buf, in->buf, in->height*in->stride*sizeof(uint8_t));
+
+    // const initializer
+    image_u8_t tmp = { .width = in->width, .height = in->height, .stride = in->stride, .buf = buf };
+
+    image_u8_t *copy = calloc(1, sizeof(image_u8_t));
+    memcpy(copy, &tmp, sizeof(image_u8_t));
+    return copy;
+}
+
+void image_u8_destroy(image_u8_t *im)
+{
+    if (!im)
+        return;
+
+    free(im->buf);
+    free(im);
+}
+
+////////////////////////////////////////////////////////////
+// PNM file i/o
+image_u8_t *image_u8_create_from_pnm(const char *path)
+{
+    return image_u8_create_from_pnm_alignment(path, DEFAULT_ALIGNMENT_U8);
+}
+
+image_u8_t *image_u8_create_from_pnm_alignment(const char *path, int alignment)
+{
+    pnm_t *pnm = pnm_create_from_file(path);
+    if (pnm == NULL)
+        return NULL;
+
+    image_u8_t *im = NULL;
+
+    switch (pnm->format) {
+        case PNM_FORMAT_GRAY: {
+            im = image_u8_create_alignment(pnm->width, pnm->height, alignment);
+
+            if (pnm->max == 255) {
+                for (int y = 0; y < im->height; y++)
+                    memcpy(&im->buf[y*im->stride], &pnm->buf[y*im->width], im->width);
+            } else if (pnm->max == 65535) {
+                for (int y = 0; y < im->height; y++)
+                    for (int x = 0; x < im->width; x++)
+                        im->buf[y*im->stride + x] = pnm->buf[2*(y*im->width + x)];
+            } else {
+                assert(0);
+            }
+
+            break;
+        }
+
+        case PNM_FORMAT_RGB: {
+            im = image_u8_create_alignment(pnm->width, pnm->height, alignment);
+
+            if (pnm->max == 255) {
+                // Gray conversion for RGB is gray = (r + g + g + b)/4
+                for (int y = 0; y < im->height; y++) {
+                    for (int x = 0; x < im->width; x++) {
+                        uint8_t gray = (pnm->buf[y*im->width*3 + 3*x+0] +    // r
+                                        pnm->buf[y*im->width*3 + 3*x+1] +    // g
+                                        pnm->buf[y*im->width*3 + 3*x+1] +    // g
+                                        pnm->buf[y*im->width*3 + 3*x+2])     // b
+                            / 4;
+
+                        im->buf[y*im->stride + x] = gray;
+                    }
+                }
+            } else if (pnm->max == 65535) {
+                for (int y = 0; y < im->height; y++) {
+                    for (int x = 0; x < im->width; x++) {
+                        int r = pnm->buf[6*(y*im->width + x) + 0];
+                        int g = pnm->buf[6*(y*im->width + x) + 2];
+                        int b = pnm->buf[6*(y*im->width + x) + 4];
+
+                        im->buf[y*im->stride + x] = (r + g + g + b) / 4;
+                    }
+                }
+            } else {
+                assert(0);
+            }
+
+            break;
+        }
+
+        case PNM_FORMAT_BINARY: {
+            im = image_u8_create_alignment(pnm->width, pnm->height, alignment);
+
+            // image is padded to be whole bytes on each row.
+
+            // how many bytes per row on the input?
+            int pbmstride = (im->width + 7) / 8;
+
+            for (int y = 0; y < im->height; y++) {
+                for (int x = 0; x < im->width; x++) {
+                    int byteidx = y * pbmstride + x / 8;
+                    int bitidx = 7 - (x & 7);
+
+                    // ack, black is one according to pbm docs!
+                    if ((pnm->buf[byteidx] >> bitidx) & 1)
+                        im->buf[y*im->stride + x] = 0;
+                    else
+                        im->buf[y*im->stride + x] = 255;
+                }
+            }
+            break;
+        }
+    }
+
+    pnm_destroy(pnm);
+    return im;
+}
+
+image_u8_t *image_u8_create_from_f32(image_f32_t *fim)
+{
+    image_u8_t *im = image_u8_create(fim->width, fim->height);
+
+    for (int y = 0; y < fim->height; y++) {
+        for (int x = 0; x < fim->width; x++) {
+            float v = fim->buf[y*fim->stride + x];
+            im->buf[y*im->stride + x] = (int) (255 * v);
+        }
+    }
+
+    return im;
+}
+
+
+int image_u8_write_pnm(const image_u8_t *im, const char *path)
+{
+    FILE *f = fopen(path, "wb");
+    int res = 0;
+
+    if (f == NULL) {
+        res = -1;
+        goto finish;
+    }
+
+    // Only outputs to grayscale
+    fprintf(f, "P5\n%d %d\n255\n", im->width, im->height);
+
+    for (int y = 0; y < im->height; y++) {
+        if (im->width != fwrite(&im->buf[y*im->stride], 1, im->width, f)) {
+            res = -2;
+            goto finish;
+        }
+    }
+
+  finish:
+    if (f != NULL)
+        fclose(f);
+
+    return res;
+}
+
+void image_u8_draw_circle(image_u8_t *im, float x0, float y0, float r, int v)
+{
+    r = r*r;
+
+    for (int y = y0-r; y <= y0+r; y++) {
+        for (int x = x0-r; x <= x0+r; x++) {
+            float d = (x-x0)*(x-x0) + (y-y0)*(y-y0);
+            if (d > r)
+                continue;
+
+            if (x >= 0 && x < im->width && y >= 0 && y < im->height) {
+                int idx = y*im->stride + x;
+                im->buf[idx] = v;
+            }
+        }
+    }
+}
+
+void image_u8_draw_annulus(image_u8_t *im, float x0, float y0, float r0, float r1, int v)
+{
+    r0 = r0*r0;
+    r1 = r1*r1;
+
+    assert(r0 < r1);
+
+    for (int y = y0-r1; y <= y0+r1; y++) {
+        for (int x = x0-r1; x <= x0+r1; x++) {
+            float d = (x-x0)*(x-x0) + (y-y0)*(y-y0);
+            if (d < r0 || d > r1)
+                continue;
+
+            int idx = y*im->stride + x;
+            im->buf[idx] = v;
+        }
+    }
+}
+
+// only widths 1 and 3 supported (and 3 only badly)
+void image_u8_draw_line(image_u8_t *im, float x0, float y0, float x1, float y1, int v, int width)
+{
+    double dist = sqrtf((y1-y0)*(y1-y0) + (x1-x0)*(x1-x0));
+    double delta = 0.5 / dist;
+
+    // terrible line drawing code
+    for (float f = 0; f <= 1; f += delta) {
+        int x = ((int) (x1 + (x0 - x1) * f));
+        int y = ((int) (y1 + (y0 - y1) * f));
+
+        if (x < 0 || y < 0 || x >= im->width || y >= im->height)
+            continue;
+
+        int idx = y*im->stride + x;
+        im->buf[idx] = v;
+        if (width > 1) {
+            im->buf[idx+1] = v;
+            im->buf[idx+im->stride] = v;
+            im->buf[idx+1+im->stride] = v;
+        }
+    }
+}
+
+void image_u8_darken(image_u8_t *im)
+{
+    for (int y = 0; y < im->height; y++) {
+        for (int x = 0; x < im->width; x++) {
+            im->buf[im->stride*y+x] /= 2;
+        }
+    }
+}
+
+static void convolve(const uint8_t *x, uint8_t *y, int sz, const uint8_t *k, int ksz)
+{
+    assert((ksz&1)==1);
+
+    for (int i = 0; i < ksz/2 && i < sz; i++)
+        y[i] = x[i];
+
+    for (int i = 0; i < sz - ksz; i++) {
+        uint32_t acc = 0;
+
+        for (int j = 0; j < ksz; j++)
+            acc += k[j]*x[i+j];
+
+        y[ksz/2 + i] = acc >> 8;
+    }
+
+    for (int i = sz - ksz + ksz/2; i < sz; i++)
+        y[i] = x[i];
+}
+
+void image_u8_convolve_2D(image_u8_t *im, const uint8_t *k, int ksz)
+{
+    assert((ksz & 1) == 1); // ksz must be odd.
+
+    for (int y = 0; y < im->height; y++) {
+
+        uint8_t *x = malloc(sizeof(uint8_t)*im->stride);
+        memcpy(x, &im->buf[y*im->stride], im->stride);
+
+        convolve(x, &im->buf[y*im->stride], im->width, k, ksz);
+        free(x);
+    }
+
+    for (int x = 0; x < im->width; x++) {
+        uint8_t *xb = malloc(sizeof(uint8_t)*im->height);
+        uint8_t *yb = malloc(sizeof(uint8_t)*im->height);
+
+        for (int y = 0; y < im->height; y++)
+            xb[y] = im->buf[y*im->stride + x];
+
+        convolve(xb, yb, im->height, k, ksz);
+        free(xb);
+
+        for (int y = 0; y < im->height; y++)
+            im->buf[y*im->stride + x] = yb[y];
+        free(yb);
+    }
+}
+
+void image_u8_gaussian_blur(image_u8_t *im, double sigma, int ksz)
+{
+    if (sigma == 0)
+        return;
+
+    assert((ksz & 1) == 1); // ksz must be odd.
+
+    // build the kernel.
+    double *dk = malloc(sizeof(double)*ksz);
+
+    // for kernel of length 5:
+    // dk[0] = f(-2), dk[1] = f(-1), dk[2] = f(0), dk[3] = f(1), dk[4] = f(2)
+    for (int i = 0; i < ksz; i++) {
+        int x = -ksz/2 + i;
+        double v = exp(-.5*sq(x / sigma));
+        dk[i] = v;
+    }
+
+    // normalize
+    double acc = 0;
+    for (int i = 0; i < ksz; i++)
+        acc += dk[i];
+
+    for (int i = 0; i < ksz; i++)
+        dk[i] /= acc;
+
+    uint8_t *k = malloc(sizeof(uint8_t)*ksz);
+    for (int i = 0; i < ksz; i++)
+        k[i] = dk[i]*255;
+
+    if (0) {
+        for (int i = 0; i < ksz; i++)
+            printf("%d %15f %5d\n", i, dk[i], k[i]);
+    }
+    free(dk);
+
+    image_u8_convolve_2D(im, k, ksz);
+    free(k);
+}
+
+image_u8_t *image_u8_rotate(const image_u8_t *in, double rad, uint8_t pad)
+{
+    int iwidth = in->width, iheight = in->height;
+    rad = -rad; // interpret y as being "down"
+
+    float c = cos(rad), s = sin(rad);
+
+    float p[][2] = { { 0, 0}, { iwidth, 0 }, { iwidth, iheight }, { 0, iheight} };
+
+    float xmin = HUGE_VALF, xmax = -HUGE_VALF, ymin = HUGE_VALF, ymax = -HUGE_VALF;
+    float icx = iwidth / 2.0, icy = iheight / 2.0;
+
+    for (int i = 0; i < 4; i++) {
+        float px = p[i][0] - icx;
+        float py = p[i][1] - icy;
+
+        float nx = px*c - py*s;
+        float ny = px*s + py*c;
+
+        xmin = fmin(xmin, nx);
+        xmax = fmax(xmax, nx);
+        ymin = fmin(ymin, ny);
+        ymax = fmax(ymax, ny);
+    }
+
+    int owidth = ceil(xmax-xmin), oheight = ceil(ymax - ymin);
+    image_u8_t *out = image_u8_create(owidth, oheight);
+
+    // iterate over output pixels.
+    for (int oy = 0; oy < oheight; oy++) {
+        for (int ox = 0; ox < owidth; ox++) {
+            // work backwards from destination coordinates...
+            // sample pixel centers.
+            float sx = ox - owidth / 2.0 + .5;
+            float sy = oy - oheight / 2.0 + .5;
+
+            // project into input-image space
+            int ix = floor(sx*c + sy*s + icx);
+            int iy = floor(-sx*s + sy*c + icy);
+
+            if (ix >= 0 && iy >= 0 && ix < iwidth && iy < iheight)
+                out->buf[oy*out->stride+ox] = in->buf[iy*in->stride + ix];
+            else
+                out->buf[oy*out->stride+ox] = pad;
+        }
+    }
+
+    return out;
+}
+
+image_u8_t *image_u8_decimate(image_u8_t *im, float ffactor)
+{
+    int width = im->width, height = im->height;
+
+    if (ffactor == 1.5) {
+        int swidth = width / 3 * 2, sheight = height / 3 * 2;
+
+        image_u8_t *decim = image_u8_create(swidth, sheight);
+
+        int y = 0, sy = 0;
+        while (sy < sheight) {
+            int x = 0, sx = 0;
+            while (sx < swidth) {
+
+                // a b c
+                // d e f
+                // g h i
+                uint8_t a = im->buf[(y+0)*im->stride + (x+0)];
+                uint8_t b = im->buf[(y+0)*im->stride + (x+1)];
+                uint8_t c = im->buf[(y+0)*im->stride + (x+2)];
+
+                uint8_t d = im->buf[(y+1)*im->stride + (x+0)];
+                uint8_t e = im->buf[(y+1)*im->stride + (x+1)];
+                uint8_t f = im->buf[(y+1)*im->stride + (x+2)];
+
+                uint8_t g = im->buf[(y+2)*im->stride + (x+0)];
+                uint8_t h = im->buf[(y+2)*im->stride + (x+1)];
+                uint8_t i = im->buf[(y+2)*im->stride + (x+2)];
+
+                decim->buf[(sy+0)*decim->stride + (sx + 0)] =
+                    (4*a+2*b+2*d+e)/9;
+                decim->buf[(sy+0)*decim->stride + (sx + 1)] =
+                    (4*c+2*b+2*f+e)/9;
+
+                decim->buf[(sy+1)*decim->stride + (sx + 0)] =
+                    (4*g+2*d+2*h+e)/9;
+                decim->buf[(sy+1)*decim->stride + (sx + 1)] =
+                    (4*i+2*f+2*h+e)/9;
+
+                x += 3;
+                sx += 2;
+            }
+
+            y += 3;
+            sy += 2;
+        }
+
+        return decim;
+    }
+
+    int factor = (int) ffactor;
+
+    int swidth = 1 + (width - 1)/factor;
+    int sheight = 1 + (height - 1)/factor;
+    image_u8_t *decim = image_u8_create(swidth, sheight);
+    int sy = 0;
+    for (int y = 0; y < height; y += factor) {
+        int sx = 0;
+        for (int x = 0; x < width; x += factor) {
+            decim->buf[sy*decim->stride + sx] = im->buf[y*im->stride + x];
+            sx++;
+        }
+        sy++;
+    }
+    return decim;
+}
+
+void image_u8_fill_line_max(image_u8_t *im, const image_u8_lut_t *lut, const float *xy0, const float *xy1)
+{
+    // what is the maximum distance that will result in drawing into our LUT?
+    float max_dist2 = (lut->nvalues-1)/lut->scale;
+    float max_dist = sqrt(max_dist2);
+
+    // the orientation of the line
+    double theta = atan2(xy1[1]-xy0[1], xy1[0]-xy0[0]);
+    double v = sin(theta), u = cos(theta);
+
+    int ix0 = iclamp(fmin(xy0[0], xy1[0]) - max_dist, 0, im->width-1);
+    int ix1 = iclamp(fmax(xy0[0], xy1[0]) + max_dist, 0, im->width-1);
+
+    int iy0 = iclamp(fmin(xy0[1], xy1[1]) - max_dist, 0, im->height-1);
+    int iy1 = iclamp(fmax(xy0[1], xy1[1]) + max_dist, 0, im->height-1);
+
+    // the line segment xy0---xy1 can be parameterized in terms of line coordinates.
+    // We fix xy0 to be at line coordinate 0.
+    float xy1_line_coord = (xy1[0]-xy0[0])*u + (xy1[1]-xy0[1])*v;
+
+    float min_line_coord = fmin(0, xy1_line_coord);
+    float max_line_coord = fmax(0, xy1_line_coord);
+
+    for (int iy = iy0; iy <= iy1; iy++) {
+        float y = iy+.5;
+
+        for (int ix = ix0; ix <= ix1; ix++) {
+            float x = ix+.5;
+
+            // compute line coordinate of this pixel.
+            float line_coord = (x - xy0[0])*u + (y - xy0[1])*v;
+
+            // find point on line segment closest to our current pixel.
+            if (line_coord < min_line_coord)
+                line_coord = min_line_coord;
+            else if (line_coord > max_line_coord)
+                line_coord = max_line_coord;
+
+            float px = xy0[0] + line_coord*u;
+            float py = xy0[1] + line_coord*v;
+
+            double dist2 = (x-px)*(x-px) + (y-py)*(y-py);
+
+            // not in our LUT?
+            int idx = dist2 * lut->scale;
+            if (idx >= lut->nvalues)
+                continue;
+
+            uint8_t lut_value = lut->values[idx];
+            uint8_t old_value = im->buf[iy*im->stride + ix];
+            if (lut_value > old_value)
+                im->buf[iy*im->stride + ix] = lut_value;
+        }
+    }
+}
diff --git a/common/image_u8.h b/common/image_u8.h
new file mode 100644
index 0000000..a0e151f
--- /dev/null
+++ b/common/image_u8.h
@@ -0,0 +1,90 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+#include "image_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct image_u8_lut image_u8_lut_t;
+struct image_u8_lut
+{
+    // When drawing, we compute the squared distance between a given pixel and a filled region.
+    // int idx = squared_distance * scale;
+    // We then index into values[idx] to obtain the color. (If we must index beyond nvalues,
+    // no drawing is performed.)
+    float    scale;
+
+    int      nvalues;
+    uint8_t *values;
+};
+
+
+// Create or load an image. returns NULL on failure. Uses default
+// stride alignment.
+image_u8_t *image_u8_create_stride(unsigned int width, unsigned int height, unsigned int stride);
+image_u8_t *image_u8_create(unsigned int width, unsigned int height);
+image_u8_t *image_u8_create_alignment(unsigned int width, unsigned int height, unsigned int alignment);
+image_u8_t *image_u8_create_from_f32(image_f32_t *fim);
+
+image_u8_t *image_u8_create_from_pnm(const char *path);
+    image_u8_t *image_u8_create_from_pnm_alignment(const char *path, int alignment);
+
+image_u8_t *image_u8_copy(const image_u8_t *in);
+void image_u8_draw_line(image_u8_t *im, float x0, float y0, float x1, float y1, int v, int width);
+void image_u8_draw_circle(image_u8_t *im, float x0, float y0, float r, int v);
+void image_u8_draw_annulus(image_u8_t *im, float x0, float y0, float r0, float r1, int v);
+
+void image_u8_fill_line_max(image_u8_t *im, const image_u8_lut_t *lut, const float *xy0, const float *xy1);
+
+void image_u8_clear(image_u8_t *im);
+void image_u8_darken(image_u8_t *im);
+void image_u8_convolve_2D(image_u8_t *im, const uint8_t *k, int ksz);
+void image_u8_gaussian_blur(image_u8_t *im, double sigma, int k);
+
+// 1.5, 2, 3, 4, ... supported
+image_u8_t *image_u8_decimate(image_u8_t *im, float factor);
+
+void image_u8_destroy(image_u8_t *im);
+
+// Write a pnm. Returns 0 on success
+// Currently only supports GRAY and RGBA. Does not write out alpha for RGBA
+int image_u8_write_pnm(const image_u8_t *im, const char *path);
+
+// rotate the image by 'rad' radians. (Rotated in the "intuitive
+// sense", i.e., if Y were up. When input values are unavailable, the
+// value 'pad' is inserted instead. The geometric center of the output
+// image corresponds to the geometric center of the input image.
+image_u8_t *image_u8_rotate(const image_u8_t *in, double rad, uint8_t pad);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/image_u8x3.c b/common/image_u8x3.c
new file mode 100644
index 0000000..1463e5f
--- /dev/null
+++ b/common/image_u8x3.c
@@ -0,0 +1,274 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "math_util.h"
+#include "pnm.h"
+
+#include "image_u8x3.h"
+
+// least common multiple of 64 (sandy bridge cache line) and 48 (stride needed
+// for 16byte-wide RGB processing). (It's possible that 48 would be enough).
+#define DEFAULT_ALIGNMENT_U8X3 192
+
+image_u8x3_t *image_u8x3_create(unsigned int width, unsigned int height)
+{
+    return image_u8x3_create_alignment(width, height, DEFAULT_ALIGNMENT_U8X3);
+}
+
+image_u8x3_t *image_u8x3_create_alignment(unsigned int width, unsigned int height, unsigned int alignment)
+{
+    int stride = 3*width;
+
+    if ((stride % alignment) != 0)
+        stride += alignment - (stride % alignment);
+
+    uint8_t *buf = calloc(height*stride, sizeof(uint8_t));
+
+    // const initializer
+    image_u8x3_t tmp = { .width = width, .height = height, .stride = stride, .buf = buf };
+
+    image_u8x3_t *im = calloc(1, sizeof(image_u8x3_t));
+    memcpy(im, &tmp, sizeof(image_u8x3_t));
+    return im;
+}
+
+image_u8x3_t *image_u8x3_copy(const image_u8x3_t *in)
+{
+    uint8_t *buf = malloc(in->height*in->stride*sizeof(uint8_t));
+    memcpy(buf, in->buf, in->height*in->stride*sizeof(uint8_t));
+
+    // const initializer
+    image_u8x3_t tmp = { .width = in->width, .height = in->height, .stride = in->stride, .buf = buf };
+
+    image_u8x3_t *copy = calloc(1, sizeof(image_u8x3_t));
+    memcpy(copy, &tmp, sizeof(image_u8x3_t));
+    return copy;
+}
+
+void image_u8x3_destroy(image_u8x3_t *im)
+{
+    if (!im)
+        return;
+
+    free(im->buf);
+    free(im);
+}
+
+////////////////////////////////////////////////////////////
+// PNM file i/o
+
+// Create an RGB image from PNM
+image_u8x3_t *image_u8x3_create_from_pnm(const char *path)
+{
+    pnm_t *pnm = pnm_create_from_file(path);
+    if (pnm == NULL)
+        return NULL;
+
+    image_u8x3_t *im = NULL;
+
+    switch (pnm->format) {
+        case PNM_FORMAT_GRAY: {
+            im = image_u8x3_create(pnm->width, pnm->height);
+
+            for (int y = 0; y < im->height; y++) {
+                for (int x = 0; x < im->width; x++) {
+                    uint8_t gray = pnm->buf[y*im->width + x];
+                    im->buf[y*im->stride + x*3 + 0] = gray;
+                    im->buf[y*im->stride + x*3 + 1] = gray;
+                    im->buf[y*im->stride + x*3 + 2] = gray;
+                }
+            }
+
+            break;
+        }
+
+        case PNM_FORMAT_RGB: {
+            im = image_u8x3_create(pnm->width, pnm->height);
+
+            for (int y = 0; y < im->height; y++) {
+                for (int x = 0; x < im->width; x++) {
+                    uint8_t r = pnm->buf[y*im->width*3 + 3*x];
+                    uint8_t g = pnm->buf[y*im->width*3 + 3*x+1];
+                    uint8_t b = pnm->buf[y*im->width*3 + 3*x+2];
+
+                    im->buf[y*im->stride + x*3 + 0] = r;
+                    im->buf[y*im->stride + x*3 + 1] = g;
+                    im->buf[y*im->stride + x*3 + 2] = b;
+                }
+            }
+
+            break;
+        }
+    }
+
+    pnm_destroy(pnm);
+    return im;
+}
+
+int image_u8x3_write_pnm(const image_u8x3_t *im, const char *path)
+{
+    FILE *f = fopen(path, "wb");
+    int res = 0;
+
+    if (f == NULL) {
+        res = -1;
+        goto finish;
+    }
+
+    // Only outputs to RGB
+    fprintf(f, "P6\n%d %d\n255\n", im->width, im->height);
+    int linesz = im->width * 3;
+    for (int y = 0; y < im->height; y++) {
+        if (linesz != fwrite(&im->buf[y*im->stride], 1, linesz, f)) {
+            res = -1;
+            goto finish;
+        }
+    }
+
+finish:
+    if (f != NULL)
+        fclose(f);
+
+    return res;
+}
+
+// only width 1 supported
+void image_u8x3_draw_line(image_u8x3_t *im, float x0, float y0, float x1, float y1, uint8_t rgb[3], int width)
+{
+    double dist = sqrtf((y1-y0)*(y1-y0) + (x1-x0)*(x1-x0));
+    double delta = 0.5 / dist;
+
+    // terrible line drawing code
+    for (float f = 0; f <= 1; f += delta) {
+        int x = ((int) (x1 + (x0 - x1) * f));
+        int y = ((int) (y1 + (y0 - y1) * f));
+
+        if (x < 0 || y < 0 || x >= im->width || y >= im->height)
+            continue;
+
+        int idx = y*im->stride + 3*x;
+        for (int i = 0; i < 3; i++)
+            im->buf[idx + i] = rgb[i];
+    }
+}
+
+static void convolve(const uint8_t *x, uint8_t *y, int sz, const uint8_t *k, int ksz)
+{
+    assert((ksz&1)==1);
+
+    for (int i = 0; i < ksz/2 && i < sz; i++)
+        y[i] = x[i];
+
+    for (int i = 0; i < sz - ksz; i++) {
+        uint32_t acc = 0;
+
+        for (int j = 0; j < ksz; j++)
+            acc += k[j]*x[i+j];
+
+        y[ksz/2 + i] = acc >> 8;
+    }
+
+    for (int i = sz - ksz + ksz/2; i < sz; i++)
+        y[i] = x[i];
+}
+
+void image_u8x3_gaussian_blur(image_u8x3_t *im, double sigma, int ksz)
+{
+    if (sigma == 0)
+        return;
+
+    assert((ksz & 1) == 1); // ksz must be odd.
+
+    // build the kernel.
+    double *dk = malloc(sizeof(double)*ksz);
+
+    // for kernel of length 5:
+    // dk[0] = f(-2), dk[1] = f(-1), dk[2] = f(0), dk[3] = f(1), dk[4] = f(2)
+    for (int i = 0; i < ksz; i++) {
+        int x = -ksz/2 + i;
+        double v = exp(-.5*sq(x / sigma));
+        dk[i] = v;
+    }
+
+    // normalize
+    double acc = 0;
+    for (int i = 0; i < ksz; i++)
+        acc += dk[i];
+
+    for (int i = 0; i < ksz; i++)
+        dk[i] /= acc;
+
+    uint8_t *k = malloc(sizeof(uint8_t)*ksz);
+    for (int i = 0; i < ksz; i++)
+        k[i] = dk[i]*255;
+
+    if (0) {
+        for (int i = 0; i < ksz; i++)
+            printf("%d %15f %5d\n", i, dk[i], k[i]);
+    }
+    free(dk);
+
+    for (int c = 0; c < 3; c++) {
+        for (int y = 0; y < im->height; y++) {
+
+            uint8_t *in = malloc(sizeof(uint8_t)*im->stride);
+            uint8_t *out = malloc(sizeof(uint8_t)*im->stride);
+
+            for (int x = 0; x < im->width; x++)
+                in[x] = im->buf[y*im->stride + 3 * x + c];
+
+            convolve(in, out, im->width, k, ksz);
+            free(in);
+
+            for (int x = 0; x < im->width; x++)
+                im->buf[y*im->stride + 3 * x + c] = out[x];
+            free(out);
+        }
+
+        for (int x = 0; x < im->width; x++) {
+            uint8_t *in = malloc(sizeof(uint8_t)*im->height);
+            uint8_t *out = malloc(sizeof(uint8_t)*im->height);
+
+            for (int y = 0; y < im->height; y++)
+                in[y] = im->buf[y*im->stride + 3*x + c];
+
+            convolve(in, out, im->height, k, ksz);
+            free(in);
+
+            for (int y = 0; y < im->height; y++)
+                im->buf[y*im->stride + 3*x + c] = out[y];
+            free(out);
+        }
+    }
+    free(k);
+}
diff --git a/common/image_u8x3.h b/common/image_u8x3.h
new file mode 100644
index 0000000..4d1713d
--- /dev/null
+++ b/common/image_u8x3.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+#include "image_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/////////////////////////////////////
+// IMPORTANT NOTE ON BYTE ORDER
+//
+// Format conversion routines will (unless otherwise specified) assume
+// R, G, B, ordering of bytes. This is consistent with GTK, PNM, etc.
+//
+/////////////////////////////////////
+
+// Create or load an image. returns NULL on failure
+image_u8x3_t *image_u8x3_create(unsigned int width, unsigned int height);
+image_u8x3_t *image_u8x3_create_alignment(unsigned int width, unsigned int height, unsigned int alignment);
+image_u8x3_t *image_u8x3_create_from_pnm(const char *path);
+
+image_u8x3_t *image_u8x3_copy(const image_u8x3_t *in);
+
+void image_u8x3_gaussian_blur(image_u8x3_t *im, double sigma, int ksz);
+
+void image_u8x3_destroy(image_u8x3_t *im);
+
+int image_u8x3_write_pnm(const image_u8x3_t *im, const char *path);
+
+// only width 1 supported
+void image_u8x3_draw_line(image_u8x3_t *im, float x0, float y0, float x1, float y1, uint8_t rgb[3], int width);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/image_u8x4.c b/common/image_u8x4.c
new file mode 100644
index 0000000..3520537
--- /dev/null
+++ b/common/image_u8x4.c
@@ -0,0 +1,233 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "pam.h"
+#include "pnm.h"
+#include "image_u8x4.h"
+
+// least common multiple of 64 (sandy bridge cache line) and 64 (stride needed
+// for 16byte-wide RGBA processing).
+#define DEFAULT_ALIGNMENT_U8X4 64
+
+image_u8x4_t *image_u8x4_create(unsigned int width, unsigned int height)
+{
+    return image_u8x4_create_alignment(width, height, DEFAULT_ALIGNMENT_U8X4);
+}
+
+image_u8x4_t *image_u8x4_create_alignment(unsigned int width, unsigned int height, unsigned int alignment)
+{
+    int stride = 4*width;
+
+    if ((stride % alignment) != 0)
+        stride += alignment - (stride % alignment);
+
+    uint8_t *buf = calloc(height*stride, sizeof(uint8_t));
+
+    // const initializer
+    image_u8x4_t tmp = { .width = width, .height = height, .stride = stride, .buf = buf };
+
+    image_u8x4_t *im = calloc(1, sizeof(image_u8x4_t));
+    memcpy(im, &tmp, sizeof(image_u8x4_t));
+    return im;
+}
+
+image_u8x4_t *image_u8x4_copy(const image_u8x4_t *in)
+{
+    uint8_t *buf = malloc(in->height*in->stride*sizeof(uint8_t));
+    memcpy(buf, in->buf, in->height*in->stride*sizeof(uint8_t));
+
+    // const initializer
+    image_u8x4_t tmp = { .width = in->width, .height = in->height, .stride = in->stride, .buf = buf };
+
+    image_u8x4_t *copy = calloc(1, sizeof(image_u8x4_t));
+    memcpy(copy, &tmp, sizeof(image_u8x4_t));
+    return copy;
+}
+
+void image_u8x4_destroy(image_u8x4_t *im)
+{
+    if (!im)
+        return;
+
+    free(im->buf);
+    free(im);
+}
+
+////////////////////////////////////////////////////////////
+image_u8x4_t *image_u8x4_create_from_pam(const char *inpath)
+{
+    pam_t *pam = pam_create_from_file(inpath);
+    if (!pam)
+        return NULL;
+
+    image_u8x4_t *im = image_u8x4_create(pam->width, pam->height);
+
+    for (int y = 0; y < pam->height; y++) {
+        if (pam->depth == 1) {
+            for (int x = 0; x < pam->width; x++) {
+                im->buf[y*im->stride + 4*x + 0] = pam->data[pam->width*y + x + 0];
+                im->buf[y*im->stride + 4*x + 1] = pam->data[pam->width*y + x + 0];
+                im->buf[y*im->stride + 4*x + 2] = pam->data[pam->width*y + x + 0];
+                im->buf[y*im->stride + 4*x + 3] = 255;
+            }
+        } else if (pam->depth == 3) {
+            for (int x = 0; x < pam->width; x++) {
+                im->buf[y*im->stride + 4*x + 0] = pam->data[3*pam->width*y + 3*x + 0];
+                im->buf[y*im->stride + 4*x + 1] = pam->data[3*pam->width*y + 3*x + 1];
+                im->buf[y*im->stride + 4*x + 2] = pam->data[3*pam->width*y + 3*x + 2];
+                im->buf[y*im->stride + 4*x + 3] = 255;
+            }
+        } else if (pam->depth == 4) {
+            memcpy(&im->buf[y*im->stride], &pam->data[4*pam->width*y], 4*pam->width);
+        } else {
+            assert(0); // not implemented
+        }
+    }
+
+    pam_destroy(pam);
+    return im;
+}
+////////////////////////////////////////////////////////////
+// PNM file i/o
+
+// Create an RGBA image from PNM
+image_u8x4_t *image_u8x4_create_from_pnm(const char *path)
+{
+    pnm_t *pnmp = pnm_create_from_file(path);
+    if (pnmp == NULL)
+        return NULL;
+
+    pnm_t pnm = *pnmp;
+    image_u8x4_t *imp = NULL;
+
+    switch (pnm.format) {
+        case PNM_FORMAT_GRAY: {
+            imp = image_u8x4_create(pnm.width, pnm.height);
+
+            // copy struct by value for common subexpression elimination
+            const image_u8x4_t im = *imp;
+
+            for (int y = 0; y < im.height; y++) {
+                for (int x = 0; x < im.width; x++) {
+                    uint8_t gray = pnm.buf[y*pnm.width + x];
+                    im.buf[y*im.stride + 4*x + 0] = gray;
+                    im.buf[y*im.stride + 4*x + 1] = gray;
+                    im.buf[y*im.stride + 4*x + 2] = gray;
+                    im.buf[y*im.stride + 4*x + 3] = 0xff;
+                }
+            }
+
+            break;
+        }
+
+        case PNM_FORMAT_RGB: {
+            imp = image_u8x4_create(pnm.width, pnm.height);
+
+            // copy struct by value for common subexpression elimination
+            const image_u8x4_t im = *imp;
+
+            // Gray conversion for RGB is gray = (r + g + g + b)/4
+            for (int y = 0; y < im.height; y++) {
+                for (int x = 0; x < im.width; x++) {
+
+                    uint8_t r = pnm.buf[y*pnm.width*3 + 3*x + 0];
+                    uint8_t g = pnm.buf[y*pnm.width*3 + 3*x + 1];
+                    uint8_t b = pnm.buf[y*pnm.width*3 + 3*x + 2];
+
+                    im.buf[y*im.stride + 4*x + 0] = r;
+                    im.buf[y*im.stride + 4*x + 1] = g;
+                    im.buf[y*im.stride + 4*x + 2] = b;
+                    im.buf[y*im.stride + 4*x + 3] = 0xff;
+                }
+            }
+
+            break;
+        }
+    }
+
+    pnm_destroy(pnmp);
+    return imp;
+}
+
+int image_u8x4_write_pnm(const image_u8x4_t *imp, const char *path)
+{
+    // copy struct by value to ensure common subexpression elimination occurs
+    const image_u8x4_t im = *imp;
+
+    FILE *f = fopen(path, "wb");
+    int res = 0;
+
+    if (f == NULL) {
+        res = -1;
+        goto finish;
+    }
+
+    // Only outputs to RGB
+    fprintf(f, "P6\n%d %d\n255\n", im.width, im.height);
+
+    for (int y = im.height-1; y >= 0; y--) {
+        for (int x = 0; x < im.width; x++) {
+
+            uint8_t r = im.buf[y*im.stride + 4*x + 0];
+            uint8_t g = im.buf[y*im.stride + 4*x + 1];
+            uint8_t b = im.buf[y*im.stride + 4*x + 2];
+
+            fwrite(&r, 1, 1, f);
+            fwrite(&g, 1, 1, f);
+            fwrite(&b, 1, 1, f);
+        }
+    }
+
+  finish:
+    if (f != NULL)
+        fclose(f);
+
+    return res;
+}
+
+void image_u8x4_write_pam(const image_u8x4_t *im, const char *path)
+{
+    FILE *f = fopen(path, "w");
+    fprintf(f, "P7\n");
+    fprintf(f, "WIDTH %d\n", im->width);
+    fprintf(f, "HEIGHT %d\n", im->height);
+    fprintf(f, "DEPTH 4\n");
+    fprintf(f, "MAXVAL 255\n");
+    fprintf(f, "TUPLTYPE RGB_ALPHA\n");
+    fprintf(f, "ENDHDR\n");
+
+    for (int y = 0; y < im->height; y++)
+        fwrite(&im->buf[y*im->stride], 1, 4*im->width, f);
+
+    fclose(f);
+
+}
diff --git a/common/image_u8x4.h b/common/image_u8x4.h
new file mode 100644
index 0000000..89820a2
--- /dev/null
+++ b/common/image_u8x4.h
@@ -0,0 +1,65 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+#include "image_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/////////////////////////////////////
+// IMPORTANT NOTE ON BYTE ORDER
+//
+// Format conversion routines will (unless otherwise specified) assume
+// R, G, B, A ordering of bytes.
+//
+/////////////////////////////////////
+
+// Create or load an image. returns NULL on failure
+image_u8x4_t *image_u8x4_create(unsigned int width, unsigned int height);
+image_u8x4_t *image_u8x4_create_alignment(unsigned int width, unsigned int height, unsigned int alignment);
+image_u8x4_t *image_u8x4_create_from_pnm(const char *path);
+
+image_u8x4_t *image_u8x4_copy(const image_u8x4_t *in);
+
+void image_u8x4_destroy(image_u8x4_t *im);
+
+// Write a pnm. Return 0 on success.
+// Currently supports GRAY and RGB
+int image_u8x4_write_pnm(const image_u8x4_t *im, const char *path);
+
+image_u8x4_t *image_u8x4_create_from_pam(const char *path);
+
+    void image_u8x4_write_pam(const image_u8x4_t *im, const char *path);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/matd.c b/common/matd.c
new file mode 100644
index 0000000..54449d9
--- /dev/null
+++ b/common/matd.c
@@ -0,0 +1,2027 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <math.h>
+#include <float.h>
+
+#include "common/math_util.h"
+#include "common/svd22.h"
+#include "common/matd.h"
+#include "common/debug_print.h"
+
+// a matd_t with rows=0 cols=0 is a SCALAR.
+
+// to ease creating mati, matf, etc. in the future.
+#define TYPE double
+
+matd_t *matd_create(int rows, int cols)
+{
+    assert(rows >= 0);
+    assert(cols >= 0);
+
+    if (rows == 0 || cols == 0)
+        return matd_create_scalar(0);
+
+    matd_t *m = calloc(1, sizeof(matd_t) + (rows*cols*sizeof(double)));
+    m->nrows = rows;
+    m->ncols = cols;
+
+    return m;
+}
+
+matd_t *matd_create_scalar(TYPE v)
+{
+    matd_t *m = calloc(1, sizeof(matd_t) + sizeof(double));
+    m->nrows = 0;
+    m->ncols = 0;
+    m->data[0] = v;
+
+    return m;
+}
+
+matd_t *matd_create_data(int rows, int cols, const TYPE *data)
+{
+    if (rows == 0 || cols == 0)
+        return matd_create_scalar(data[0]);
+
+    matd_t *m = matd_create(rows, cols);
+    for (int i = 0; i < rows * cols; i++)
+        m->data[i] = data[i];
+
+    return m;
+}
+
+matd_t *matd_create_dataf(int rows, int cols, const float *data)
+{
+    if (rows == 0 || cols == 0)
+        return matd_create_scalar(data[0]);
+
+    matd_t *m = matd_create(rows, cols);
+    for (int i = 0; i < rows * cols; i++)
+        m->data[i] = (double)data[i];
+
+    return m;
+}
+
+matd_t *matd_identity(int dim)
+{
+    if (dim == 0)
+        return matd_create_scalar(1);
+
+    matd_t *m = matd_create(dim, dim);
+    for (int i = 0; i < dim; i++)
+        MATD_EL(m, i, i) = 1;
+
+    return m;
+}
+
+// row and col are zero-based
+TYPE matd_get(const matd_t *m, int row, int col)
+{
+    assert(m != NULL);
+    assert(!matd_is_scalar(m));
+    assert(row >= 0);
+    assert(row < m->nrows);
+    assert(col >= 0);
+    assert(col < m->ncols);
+
+    return MATD_EL(m, row, col);
+}
+
+// row and col are zero-based
+void matd_put(matd_t *m, int row, int col, TYPE value)
+{
+    assert(m != NULL);
+
+    if (matd_is_scalar(m)) {
+        matd_put_scalar(m, value);
+        return;
+    }
+
+    assert(row >= 0);
+    assert(row < m->nrows);
+    assert(col >= 0);
+    assert(col < m->ncols);
+
+    MATD_EL(m, row, col) = value;
+}
+
+TYPE matd_get_scalar(const matd_t *m)
+{
+    assert(m != NULL);
+    assert(matd_is_scalar(m));
+
+    return (m->data[0]);
+}
+
+void matd_put_scalar(matd_t *m, TYPE value)
+{
+    assert(m != NULL);
+    assert(matd_is_scalar(m));
+
+    m->data[0] = value;
+}
+
+matd_t *matd_copy(const matd_t *m)
+{
+    assert(m != NULL);
+
+    matd_t *x = matd_create(m->nrows, m->ncols);
+    if (matd_is_scalar(m))
+        x->data[0] = m->data[0];
+    else
+        memcpy(x->data, m->data, sizeof(TYPE)*m->ncols*m->nrows);
+
+    return x;
+}
+
+matd_t *matd_select(const matd_t * a, int r0, int r1, int c0, int c1)
+{
+    assert(a != NULL);
+
+    assert(r0 >= 0 && r0 < a->nrows);
+    assert(c0 >= 0 && c0 < a->ncols);
+
+    int nrows = r1 - r0 + 1;
+    int ncols = c1 - c0 + 1;
+
+    matd_t * r = matd_create(nrows, ncols);
+
+    for (int row = r0; row <= r1; row++)
+        for (int col = c0; col <= c1; col++)
+            MATD_EL(r,row-r0,col-c0) = MATD_EL(a,row,col);
+
+    return r;
+}
+
+void matd_print(const matd_t *m, const char *fmt)
+{
+    assert(m != NULL);
+    assert(fmt != NULL);
+
+    if (matd_is_scalar(m)) {
+        printf(fmt, MATD_EL(m, 0, 0));
+        printf("\n");
+    } else {
+        for (int i = 0; i < m->nrows; i++) {
+            for (int j = 0; j < m->ncols; j++) {
+                printf(fmt, MATD_EL(m, i, j));
+            }
+            printf("\n");
+        }
+    }
+}
+
+void matd_print_transpose(const matd_t *m, const char *fmt)
+{
+    assert(m != NULL);
+    assert(fmt != NULL);
+
+    if (matd_is_scalar(m)) {
+        printf(fmt, MATD_EL(m, 0, 0));
+        printf("\n");
+    } else {
+        for (int j = 0; j < m->ncols; j++) {
+            for (int i = 0; i < m->nrows; i++) {
+                printf(fmt, MATD_EL(m, i, j));
+            }
+            printf("\n");
+        }
+    }
+}
+
+void matd_destroy(matd_t *m)
+{
+    if (!m)
+        return;
+
+    assert(m != NULL);
+    free(m);
+}
+
+matd_t *matd_multiply(const matd_t *a, const matd_t *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+
+    if (matd_is_scalar(a))
+        return matd_scale(b, a->data[0]);
+    if (matd_is_scalar(b))
+        return matd_scale(a, b->data[0]);
+
+    assert(a->ncols == b->nrows);
+    matd_t *m = matd_create(a->nrows, b->ncols);
+
+    for (int i = 0; i < m->nrows; i++) {
+        for (int j = 0; j < m->ncols; j++) {
+            TYPE acc = 0;
+            for (int k = 0; k < a->ncols; k++) {
+                acc += MATD_EL(a, i, k) * MATD_EL(b, k, j);
+            }
+            MATD_EL(m, i, j) = acc;
+        }
+    }
+
+    return m;
+}
+
+matd_t *matd_scale(const matd_t *a, double s)
+{
+    assert(a != NULL);
+
+    if (matd_is_scalar(a))
+        return matd_create_scalar(a->data[0] * s);
+
+    matd_t *m = matd_create(a->nrows, a->ncols);
+
+    for (int i = 0; i < m->nrows; i++) {
+        for (int j = 0; j < m->ncols; j++) {
+            MATD_EL(m, i, j) = s * MATD_EL(a, i, j);
+        }
+    }
+
+    return m;
+}
+
+void matd_scale_inplace(matd_t *a, double s)
+{
+    assert(a != NULL);
+
+    if (matd_is_scalar(a)) {
+        a->data[0] *= s;
+        return;
+    }
+
+    for (int i = 0; i < a->nrows; i++) {
+        for (int j = 0; j < a->ncols; j++) {
+            MATD_EL(a, i, j) *= s;
+        }
+    }
+}
+
+matd_t *matd_add(const matd_t *a, const matd_t *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(a->nrows == b->nrows);
+    assert(a->ncols == b->ncols);
+
+    if (matd_is_scalar(a))
+        return matd_create_scalar(a->data[0] + b->data[0]);
+
+    matd_t *m = matd_create(a->nrows, a->ncols);
+
+    for (int i = 0; i < m->nrows; i++) {
+        for (int j = 0; j < m->ncols; j++) {
+            MATD_EL(m, i, j) = MATD_EL(a, i, j) + MATD_EL(b, i, j);
+        }
+    }
+
+    return m;
+}
+
+void matd_add_inplace(matd_t *a, const matd_t *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(a->nrows == b->nrows);
+    assert(a->ncols == b->ncols);
+
+    if (matd_is_scalar(a)) {
+        a->data[0] += b->data[0];
+        return;
+    }
+
+    for (int i = 0; i < a->nrows; i++) {
+        for (int j = 0; j < a->ncols; j++) {
+            MATD_EL(a, i, j) += MATD_EL(b, i, j);
+        }
+    }
+}
+
+
+matd_t *matd_subtract(const matd_t *a, const matd_t *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(a->nrows == b->nrows);
+    assert(a->ncols == b->ncols);
+
+    if (matd_is_scalar(a))
+        return matd_create_scalar(a->data[0] - b->data[0]);
+
+    matd_t *m = matd_create(a->nrows, a->ncols);
+
+    for (int i = 0; i < m->nrows; i++) {
+        for (int j = 0; j < m->ncols; j++) {
+            MATD_EL(m, i, j) = MATD_EL(a, i, j) - MATD_EL(b, i, j);
+        }
+    }
+
+    return m;
+}
+
+void matd_subtract_inplace(matd_t *a, const matd_t *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(a->nrows == b->nrows);
+    assert(a->ncols == b->ncols);
+
+    if (matd_is_scalar(a)) {
+        a->data[0] -= b->data[0];
+        return;
+    }
+
+    for (int i = 0; i < a->nrows; i++) {
+        for (int j = 0; j < a->ncols; j++) {
+            MATD_EL(a, i, j) -= MATD_EL(b, i, j);
+        }
+    }
+}
+
+
+matd_t *matd_transpose(const matd_t *a)
+{
+    assert(a != NULL);
+
+    if (matd_is_scalar(a))
+        return matd_create_scalar(a->data[0]);
+
+    matd_t *m = matd_create(a->ncols, a->nrows);
+
+    for (int i = 0; i < a->nrows; i++) {
+        for (int j = 0; j < a->ncols; j++) {
+            MATD_EL(m, j, i) = MATD_EL(a, i, j);
+        }
+    }
+    return m;
+}
+
+static
+double matd_det_general(const matd_t *a)
+{
+    // Use LU decompositon to calculate the determinant
+    matd_plu_t *mlu = matd_plu(a);
+    matd_t *L = matd_plu_l(mlu);
+    matd_t *U = matd_plu_u(mlu);
+
+    // The determinants of the L and U matrices are the products of
+    // their respective diagonal elements
+    double detL = 1; double detU = 1;
+    for (int i = 0; i < a->nrows; i++) {
+        detL *= matd_get(L, i, i);
+        detU *= matd_get(U, i, i);
+    }
+
+    // The determinant of a can be calculated as
+    //     epsilon*det(L)*det(U),
+    // where epsilon is just the sign of the corresponding permutation
+    // (which is +1 for an even number of permutations and is −1
+    // for an uneven number of permutations).
+    double det = mlu->pivsign * detL * detU;
+
+    // Cleanup
+    matd_plu_destroy(mlu);
+    matd_destroy(L);
+    matd_destroy(U);
+
+    return det;
+}
+
+double matd_det(const matd_t *a)
+{
+    assert(a != NULL);
+    assert(a->nrows == a->ncols);
+
+    switch(a->nrows) {
+        case 0:
+            // scalar: invalid
+            assert(a->nrows > 0);
+            break;
+
+        case 1:
+            // 1x1 matrix
+            return a->data[0];
+
+        case 2:
+            // 2x2 matrix
+            return a->data[0] * a->data[3] - a->data[1] * a->data[2];
+
+        case 3:
+            // 3x3 matrix
+            return  a->data[0]*a->data[4]*a->data[8]
+                - a->data[0]*a->data[5]*a->data[7]
+                + a->data[1]*a->data[5]*a->data[6]
+                - a->data[1]*a->data[3]*a->data[8]
+                + a->data[2]*a->data[3]*a->data[7]
+                - a->data[2]*a->data[4]*a->data[6];
+
+        case 4: {
+            // 4x4 matrix
+            double m00 = MATD_EL(a,0,0), m01 = MATD_EL(a,0,1), m02 = MATD_EL(a,0,2), m03 = MATD_EL(a,0,3);
+            double m10 = MATD_EL(a,1,0), m11 = MATD_EL(a,1,1), m12 = MATD_EL(a,1,2), m13 = MATD_EL(a,1,3);
+            double m20 = MATD_EL(a,2,0), m21 = MATD_EL(a,2,1), m22 = MATD_EL(a,2,2), m23 = MATD_EL(a,2,3);
+            double m30 = MATD_EL(a,3,0), m31 = MATD_EL(a,3,1), m32 = MATD_EL(a,3,2), m33 = MATD_EL(a,3,3);
+
+            return m00 * m11 * m22 * m33 - m00 * m11 * m23 * m32 -
+                m00 * m21 * m12 * m33 + m00 * m21 * m13 * m32 + m00 * m31 * m12 * m23 -
+                m00 * m31 * m13 * m22 - m10 * m01 * m22 * m33 +
+                m10 * m01 * m23 * m32 + m10 * m21 * m02 * m33 -
+                m10 * m21 * m03 * m32 - m10 * m31 * m02 * m23 +
+                m10 * m31 * m03 * m22 + m20 * m01 * m12 * m33 -
+                m20 * m01 * m13 * m32 - m20 * m11 * m02 * m33 +
+                m20 * m11 * m03 * m32 + m20 * m31 * m02 * m13 -
+                m20 * m31 * m03 * m12 - m30 * m01 * m12 * m23 +
+                m30 * m01 * m13 * m22 + m30 * m11 * m02 * m23 -
+                m30 * m11 * m03 * m22 - m30 * m21 * m02 * m13 +
+                m30 * m21 * m03 * m12;
+        }
+
+        default:
+            return matd_det_general(a);
+    }
+
+    assert(0);
+    return 0;
+}
+
+// returns NULL if the matrix is (exactly) singular. Caller is
+// otherwise responsible for knowing how to cope with badly
+// conditioned matrices.
+matd_t *matd_inverse(const matd_t *x)
+{
+    matd_t *m = NULL;
+
+    assert(x != NULL);
+    assert(x->nrows == x->ncols);
+
+    if (matd_is_scalar(x)) {
+        if (x->data[0] == 0)
+            return NULL;
+
+        return matd_create_scalar(1.0 / x->data[0]);
+    }
+
+    switch(x->nrows) {
+        case 1: {
+            double det = x->data[0];
+            if (det == 0)
+                return NULL;
+
+            double invdet = 1.0 / det;
+
+            m = matd_create(x->nrows, x->nrows);
+            MATD_EL(m, 0, 0) = 1.0 * invdet;
+            return m;
+        }
+
+        case 2: {
+            double det = x->data[0] * x->data[3] - x->data[1] * x->data[2];
+            if (det == 0)
+                return NULL;
+
+            double invdet = 1.0 / det;
+
+            m = matd_create(x->nrows, x->nrows);
+            MATD_EL(m, 0, 0) = MATD_EL(x, 1, 1) * invdet;
+            MATD_EL(m, 0, 1) = - MATD_EL(x, 0, 1) * invdet;
+            MATD_EL(m, 1, 0) = - MATD_EL(x, 1, 0) * invdet;
+            MATD_EL(m, 1, 1) = MATD_EL(x, 0, 0) * invdet;
+            return m;
+        }
+
+        default: {
+            matd_plu_t *plu = matd_plu(x);
+
+            matd_t *inv = NULL;
+            if (!plu->singular) {
+                matd_t *ident = matd_identity(x->nrows);
+                inv = matd_plu_solve(plu, ident);
+                matd_destroy(ident);
+            }
+
+            matd_plu_destroy(plu);
+
+            return inv;
+        }
+    }
+
+    return NULL; // unreachable
+}
+
+
+
+// TODO Optimization: Some operations we could perform in-place,
+// saving some memory allocation work. E.g., ADD, SUBTRACT. Just need
+// to make sure that we don't do an in-place modification on a matrix
+// that was an input argument!
+
+// handle right-associative operators, greedily consuming them. These
+// include transpose and inverse. This is called by the main recursion
+// method.
+static inline matd_t *matd_op_gobble_right(const char *expr, int *pos, matd_t *acc, matd_t **garb, int *garbpos)
+{
+    while (expr[*pos] != 0) {
+
+        switch (expr[*pos]) {
+
+            case '\'': {
+                assert(acc != NULL); // either a syntax error or a math op failed, producing null
+                matd_t *res = matd_transpose(acc);
+                garb[*garbpos] = res;
+                (*garbpos)++;
+                acc = res;
+
+                (*pos)++;
+                break;
+            }
+
+                // handle inverse ^-1. No other exponents are allowed.
+            case '^': {
+                assert(acc != NULL);
+                assert(expr[*pos+1] == '-');
+                assert(expr[*pos+2] == '1');
+
+                matd_t *res = matd_inverse(acc);
+                garb[*garbpos] = res;
+                (*garbpos)++;
+                acc = res;
+
+                (*pos)+=3;
+                break;
+            }
+
+            default:
+                return acc;
+        }
+    }
+
+    return acc;
+}
+
+// @garb, garbpos  A list of every matrix allocated during evaluation... used to assist cleanup.
+// @oneterm: we should return at the end of this term (i.e., stop at a PLUS, MINUS, LPAREN).
+static matd_t *matd_op_recurse(const char *expr, int *pos, matd_t *acc, matd_t **args, int *argpos,
+                               matd_t **garb, int *garbpos, int oneterm)
+{
+    while (expr[*pos] != 0) {
+
+        switch (expr[*pos]) {
+
+            case '(': {
+                if (oneterm && acc != NULL)
+                    return acc;
+                (*pos)++;
+                matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 0);
+                rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                if (acc == NULL) {
+                    acc = rhs;
+                } else {
+                    matd_t *res = matd_multiply(acc, rhs);
+                    garb[*garbpos] = res;
+                    (*garbpos)++;
+                    acc = res;
+                }
+
+                break;
+            }
+
+            case ')': {
+                if (oneterm)
+                    return acc;
+
+                (*pos)++;
+                return acc;
+            }
+
+            case '*': {
+                (*pos)++;
+
+                matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
+                rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                if (acc == NULL) {
+                    acc = rhs;
+                } else {
+                    matd_t *res = matd_multiply(acc, rhs);
+                    garb[*garbpos] = res;
+                    (*garbpos)++;
+                    acc = res;
+                }
+
+                break;
+            }
+
+            case 'F': {
+                matd_t *rhs = args[*argpos];
+                garb[*garbpos] = rhs;
+                (*garbpos)++;
+
+                (*pos)++;
+                (*argpos)++;
+
+                rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                if (acc == NULL) {
+                    acc = rhs;
+                } else {
+                    matd_t *res = matd_multiply(acc, rhs);
+                    garb[*garbpos] = res;
+                    (*garbpos)++;
+                    acc = res;
+                }
+
+                break;
+            }
+
+            case 'M': {
+                matd_t *rhs = args[*argpos];
+
+                (*pos)++;
+                (*argpos)++;
+
+                rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                if (acc == NULL) {
+                    acc = rhs;
+                } else {
+                    matd_t *res = matd_multiply(acc, rhs);
+                    garb[*garbpos] = res;
+                    (*garbpos)++;
+                    acc = res;
+                }
+
+                break;
+            }
+
+/*
+  case 'D': {
+  int rows = expr[*pos+1]-'0';
+  int cols = expr[*pos+2]-'0';
+
+  matd_t *rhs = matd_create(rows, cols);
+
+  break;
+  }
+*/
+                // a constant (SCALAR) defined inline. Treat just like M, creating a matd_t on the fly.
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            case '.': {
+                const char *start = &expr[*pos];
+                char *end;
+                double s = strtod(start, &end);
+                (*pos) += (end - start);
+                matd_t *rhs = matd_create_scalar(s);
+                garb[*garbpos] = rhs;
+                (*garbpos)++;
+
+                rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                if (acc == NULL) {
+                    acc = rhs;
+                } else {
+                    matd_t *res = matd_multiply(acc, rhs);
+                    garb[*garbpos] = res;
+                    (*garbpos)++;
+                    acc = res;
+                }
+
+                break;
+            }
+
+            case '+': {
+                if (oneterm && acc != NULL)
+                    return acc;
+
+                // don't support unary plus
+                assert(acc != NULL);
+                (*pos)++;
+                matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
+                rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                matd_t *res = matd_add(acc, rhs);
+
+                garb[*garbpos] = res;
+                (*garbpos)++;
+                acc = res;
+                break;
+            }
+
+            case '-': {
+                if (oneterm && acc != NULL)
+                    return acc;
+
+                if (acc == NULL) {
+                    // unary minus
+                    (*pos)++;
+                    matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
+                    rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                    matd_t *res = matd_scale(rhs, -1);
+                    garb[*garbpos] = res;
+                    (*garbpos)++;
+                    acc = res;
+                } else {
+                    // subtract
+                    (*pos)++;
+                    matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
+                    rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
+
+                    matd_t *res = matd_subtract(acc, rhs);
+                    garb[*garbpos] = res;
+                    (*garbpos)++;
+                    acc = res;
+                }
+                break;
+            }
+
+            case ' ': {
+                // nothing to do. spaces are meaningless.
+                (*pos)++;
+                break;
+            }
+
+            default: {
+                debug_print("Unknown character: '%c'\n", expr[*pos]);
+                assert(expr[*pos] != expr[*pos]);
+            }
+        }
+    }
+    return acc;
+}
+
+// always returns a new matrix.
+matd_t *matd_op(const char *expr, ...)
+{
+    int nargs = 0;
+    int exprlen = 0;
+
+    assert(expr != NULL);
+
+    for (const char *p = expr; *p != 0; p++) {
+        if (*p == 'M' || *p == 'F')
+            nargs++;
+        exprlen++;
+    }
+
+    assert(nargs > 0);
+
+    if (!exprlen) // expr = ""
+        return NULL;
+
+    va_list ap;
+    va_start(ap, expr);
+
+    matd_t **args = malloc(sizeof(matd_t*)*nargs);
+    for (int i = 0; i < nargs; i++) {
+        args[i] = va_arg(ap, matd_t*);
+        // XXX: sanity check argument; emit warning/error if args[i]
+        // doesn't look like a matd_t*.
+    }
+
+    va_end(ap);
+
+    int pos = 0;
+    int argpos = 0;
+    int garbpos = 0;
+
+    // can't create more than 2 new result per character
+    // one result, and possibly one argument to free
+    matd_t **garb = malloc(sizeof(matd_t*)*2*exprlen);
+
+    matd_t *res = matd_op_recurse(expr, &pos, NULL, args, &argpos, garb, &garbpos, 0);
+    free(args);
+
+    // 'res' may need to be freed as part of garbage collection (i.e. expr = "F")
+    matd_t *res_copy = (res ? matd_copy(res) : NULL);
+
+    for (int i = 0; i < garbpos; i++) {
+        matd_destroy(garb[i]);
+    }
+    free(garb);
+
+    return res_copy;
+}
+
+double matd_vec_mag(const matd_t *a)
+{
+    assert(a != NULL);
+    assert(matd_is_vector(a));
+
+    double mag = 0.0;
+    int len = a->nrows*a->ncols;
+    for (int i = 0; i < len; i++)
+        mag += sq(a->data[i]);
+    return sqrt(mag);
+}
+
+double matd_vec_dist(const matd_t *a, const matd_t *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(matd_is_vector(a) && matd_is_vector(b));
+    assert(a->nrows*a->ncols == b->nrows*b->ncols);
+
+    int lena = a->nrows*a->ncols;
+    return matd_vec_dist_n(a, b, lena);
+}
+
+double matd_vec_dist_n(const matd_t *a, const matd_t *b, int n)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(matd_is_vector(a) && matd_is_vector(b));
+
+    int lena = a->nrows*a->ncols;
+    int lenb = b->nrows*b->ncols;
+
+    assert(n <= lena && n <= lenb);
+
+    double mag = 0.0;
+    for (int i = 0; i < n; i++)
+        mag += sq(a->data[i] - b->data[i]);
+    return sqrt(mag);
+}
+
+// find the index of the off-diagonal element with the largest mag
+static inline int max_idx(const matd_t *A, int row, int maxcol)
+{
+    int maxi = 0;
+    double maxv = -1;
+
+    for (int i = 0; i < maxcol; i++) {
+        if (i == row)
+            continue;
+        double v = fabs(MATD_EL(A, row, i));
+        if (v > maxv) {
+            maxi = i;
+            maxv = v;
+        }
+    }
+
+    return maxi;
+}
+
+double matd_vec_dot_product(const matd_t *a, const matd_t *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(matd_is_vector(a) && matd_is_vector(b));
+    int adim = a->ncols*a->nrows;
+    int bdim = b->ncols*b->nrows;
+    assert(adim == bdim);
+
+    double acc = 0;
+    for (int i = 0; i < adim; i++) {
+        acc += a->data[i] * b->data[i];
+    }
+    return acc;
+}
+
+
+matd_t *matd_vec_normalize(const matd_t *a)
+{
+    assert(a != NULL);
+    assert(matd_is_vector(a));
+
+    double mag = matd_vec_mag(a);
+    assert(mag > 0);
+
+    matd_t *b = matd_create(a->nrows, a->ncols);
+
+    int len = a->nrows*a->ncols;
+    for(int i = 0; i < len; i++)
+        b->data[i] = a->data[i] / mag;
+
+    return b;
+}
+
+matd_t *matd_crossproduct(const matd_t *a, const matd_t *b)
+{ // only defined for vecs (col or row) of length 3
+    assert(a != NULL);
+    assert(b != NULL);
+    assert(matd_is_vector_len(a, 3) && matd_is_vector_len(b, 3));
+
+    matd_t * r = matd_create(a->nrows, a->ncols);
+
+    r->data[0] = a->data[1] * b->data[2] - a->data[2] * b->data[1];
+    r->data[1] = a->data[2] * b->data[0] - a->data[0] * b->data[2];
+    r->data[2] = a->data[0] * b->data[1] - a->data[1] * b->data[0];
+
+    return r;
+}
+
+TYPE matd_err_inf(const matd_t *a, const matd_t *b)
+{
+    assert(a->nrows == b->nrows);
+    assert(a->ncols == b->ncols);
+
+    TYPE maxf = 0;
+
+    for (int i = 0; i < a->nrows; i++) {
+        for (int j = 0; j < a->ncols; j++) {
+            TYPE av = MATD_EL(a, i, j);
+            TYPE bv = MATD_EL(b, i, j);
+
+            TYPE err = fabs(av - bv);
+            maxf = fmax(maxf, err);
+        }
+    }
+
+    return maxf;
+}
+
+// Computes an SVD for square or tall matrices. This code doesn't work
+// for wide matrices, because the bidiagonalization results in one
+// non-zero element too far to the right for us to rotate away.
+//
+// Caller is responsible for destroying U, S, and V.
+static matd_svd_t matd_svd_tall(matd_t *A, int flags)
+{
+    matd_t *B = matd_copy(A);
+
+    // Apply householder reflections on each side to reduce A to
+    // bidiagonal form. Specifically:
+    //
+    // A = LS*B*RS'
+    //
+    // Where B is bidiagonal, and LS/RS are unitary.
+    //
+    // Why are we doing this? Some sort of transformation is necessary
+    // to reduce the matrix's nz elements to a square region. QR could
+    // work too. We need nzs confined to a square region so that the
+    // subsequent iterative process, which is based on rotations, can
+    // work. (To zero out a term at (i,j), our rotations will also
+    // affect (j,i).
+    //
+    // We prefer bidiagonalization over QR because it gets us "closer"
+    // to the SVD, which should mean fewer iterations.
+
+    // LS: cumulative left-handed transformations
+    matd_t *LS = matd_identity(A->nrows);
+
+    // RS: cumulative right-handed transformations.
+    matd_t *RS = matd_identity(A->ncols);
+
+    for (int hhidx = 0; hhidx < A->nrows; hhidx++)  {
+
+        if (hhidx < A->ncols) {
+            // We construct the normal of the reflection plane: let u
+            // be the vector to reflect, x =[ M 0 0 0 ] the target
+            // location for u (u') after reflection (with M = ||u||).
+            //
+            // The normal vector is then n = (u - x), but since we
+            // could equally have the target location be x = [-M 0 0 0
+            // ], we could use n = (u + x).
+            //
+            // We then normalize n. To ensure a reasonable magnitude,
+            // we select the sign of M so as to maximize the magnitude
+            // of the first element of (x +/- M). (Otherwise, we could
+            // end up with a divide-by-zero if u[0] and M cancel.)
+            //
+            // The householder reflection matrix is then H=(I - nn'), and
+            // u' = Hu.
+            //
+            //
+            int vlen = A->nrows - hhidx;
+
+            double *v = malloc(sizeof(double)*vlen);
+
+            double mag2 = 0;
+            for (int i = 0; i < vlen; i++) {
+                v[i] = MATD_EL(B, hhidx+i, hhidx);
+                mag2 += v[i]*v[i];
+            }
+
+            double oldv0 = v[0];
+            if (oldv0 < 0)
+                v[0] -= sqrt(mag2);
+            else
+                v[0] += sqrt(mag2);
+
+            mag2 += -oldv0*oldv0 + v[0]*v[0];
+
+            // normalize v
+            double mag = sqrt(mag2);
+
+            // this case arises with matrices of all zeros, for example.
+            if (mag == 0) {
+                free(v);
+                continue;
+            }
+
+            for (int i = 0; i < vlen; i++)
+                v[i] /= mag;
+
+            // Q = I - 2vv'
+            //matd_t *Q = matd_identity(A->nrows);
+            //for (int i = 0; i < vlen; i++)
+            //  for (int j = 0; j < vlen; j++)
+            //    MATD_EL(Q, i+hhidx, j+hhidx) -= 2*v[i]*v[j];
+
+
+            // LS = matd_op("F*M", LS, Q);
+            // Implementation: take each row of LS, compute dot product with n,
+            // subtract n (scaled by dot product) from it.
+            for (int i = 0; i < LS->nrows; i++) {
+                double dot = 0;
+                for (int j = 0; j < vlen; j++)
+                    dot += MATD_EL(LS, i, hhidx+j) * v[j];
+                for (int j = 0; j < vlen; j++)
+                    MATD_EL(LS, i, hhidx+j) -= 2*dot*v[j];
+            }
+
+            //  B = matd_op("M*F", Q, B); // should be Q', but Q is symmetric.
+            for (int i = 0; i < B->ncols; i++) {
+                double dot = 0;
+                for (int j = 0; j < vlen; j++)
+                    dot += MATD_EL(B, hhidx+j, i) * v[j];
+                for (int j = 0; j < vlen; j++)
+                    MATD_EL(B, hhidx+j, i) -= 2*dot*v[j];
+            }
+
+            free(v);
+        }
+
+        if (hhidx+2 < A->ncols) {
+            int vlen = A->ncols - hhidx - 1;
+
+            double *v = malloc(sizeof(double)*vlen);
+
+            double mag2 = 0;
+            for (int i = 0; i < vlen; i++) {
+                v[i] = MATD_EL(B, hhidx, hhidx+i+1);
+                mag2 += v[i]*v[i];
+            }
+
+            double oldv0 = v[0];
+            if (oldv0 < 0)
+                v[0] -= sqrt(mag2);
+            else
+                v[0] += sqrt(mag2);
+
+            mag2 += -oldv0*oldv0 + v[0]*v[0];
+
+            // compute magnitude of ([1 0 0..]+v)
+            double mag = sqrt(mag2);
+
+            // this case can occur when the vectors are already perpendicular
+            if (mag == 0) {
+                free(v);
+                continue;
+            }
+
+            for (int i = 0; i < vlen; i++)
+                v[i] /= mag;
+
+            // TODO: optimize these multiplications
+            // matd_t *Q = matd_identity(A->ncols);
+            //  for (int i = 0; i < vlen; i++)
+            //    for (int j = 0; j < vlen; j++)
+            //       MATD_EL(Q, i+1+hhidx, j+1+hhidx) -= 2*v[i]*v[j];
+
+            //  RS = matd_op("F*M", RS, Q);
+            for (int i = 0; i < RS->nrows; i++) {
+                double dot = 0;
+                for (int j = 0; j < vlen; j++)
+                    dot += MATD_EL(RS, i, hhidx+1+j) * v[j];
+                for (int j = 0; j < vlen; j++)
+                    MATD_EL(RS, i, hhidx+1+j) -= 2*dot*v[j];
+            }
+
+            //   B = matd_op("F*M", B, Q); // should be Q', but Q is symmetric.
+            for (int i = 0; i < B->nrows; i++) {
+                double dot = 0;
+                for (int j = 0; j < vlen; j++)
+                    dot += MATD_EL(B, i, hhidx+1+j) * v[j];
+                for (int j = 0; j < vlen; j++)
+                    MATD_EL(B, i, hhidx+1+j) -= 2*dot*v[j];
+            }
+
+            free(v);
+        }
+    }
+
+    // empirically, we find a roughly linear worst-case number of iterations
+    // as a function of rows*cols. maxiters ~= 1.5*nrows*ncols
+    // we're a bit conservative below.
+    int maxiters = 200 + 2 * A->nrows * A->ncols;
+    assert(maxiters > 0); // reassure clang
+    int iter;
+
+    double maxv; // maximum non-zero value being reduced this iteration
+
+    double tol = 1E-10;
+
+    // which method will we use to find the largest off-diagonal
+    // element of B?
+    const int find_max_method = 1; //(B->ncols < 6) ? 2 : 1;
+
+    // for each of the first B->ncols rows, which index has the
+    // maximum absolute value? (used by method 1)
+    int *maxrowidx = malloc(sizeof(int)*B->ncols);
+    int lastmaxi, lastmaxj;
+
+    if (find_max_method == 1) {
+        for (int i = 2; i < B->ncols; i++)
+            maxrowidx[i] = max_idx(B, i, B->ncols);
+
+        // note that we started the array at 2. That's because by setting
+        // these values below, we'll recompute first two entries on the
+        // first iteration!
+        lastmaxi = 0, lastmaxj = 1;
+    }
+
+    for (iter = 0; iter < maxiters; iter++) {
+
+        // No diagonalization required for 0x0 and 1x1 matrices.
+        if (B->ncols < 2)
+            break;
+
+        // find the largest off-diagonal element of B, and put its
+        // coordinates in maxi, maxj.
+        int maxi, maxj;
+
+        if (find_max_method == 1) {
+            // method 1 is the "smarter" method which does at least
+            // 4*ncols work. More work might be needed (up to
+            // ncols*ncols), depending on data. Thus, this might be a
+            // bit slower than the default method for very small
+            // matrices.
+            maxi = -1;
+            maxv = -1;
+
+            // every iteration, we must deal with the fact that rows
+            // and columns lastmaxi and lastmaxj have been
+            // modified. Update maxrowidx accordingly.
+
+            // now, EVERY row also had columns lastmaxi and lastmaxj modified.
+            for (int rowi = 0; rowi < B->ncols; rowi++) {
+
+                // the magnitude of the largest off-diagonal element
+                // in this row.
+                double thismaxv;
+
+                // row 'lastmaxi' and 'lastmaxj' have been completely
+                // changed. compute from scratch.
+                if (rowi == lastmaxi || rowi == lastmaxj) {
+                    maxrowidx[rowi] = max_idx(B, rowi, B->ncols);
+                    thismaxv = fabs(MATD_EL(B, rowi, maxrowidx[rowi]));
+                    goto endrowi;
+                }
+
+                // our maximum entry was just modified. We don't know
+                // if it went up or down, and so we don't know if it
+                // is still the maximum. We have to update from
+                // scratch.
+                if (maxrowidx[rowi] == lastmaxi || maxrowidx[rowi] == lastmaxj) {
+                    maxrowidx[rowi] = max_idx(B, rowi, B->ncols);
+                    thismaxv = fabs(MATD_EL(B, rowi, maxrowidx[rowi]));
+                    goto endrowi;
+                }
+
+                // This row is unchanged, except for columns
+                // 'lastmaxi' and 'lastmaxj', and those columns were
+                // not previously the largest entry...  just check to
+                // see if they are now the maximum entry in their
+                // row. (Remembering to consider off-diagonal entries
+                // only!)
+                thismaxv = fabs(MATD_EL(B, rowi, maxrowidx[rowi]));
+
+                // check column lastmaxi. Is it now the maximum?
+                if (lastmaxi != rowi) {
+                    double v = fabs(MATD_EL(B, rowi, lastmaxi));
+                    if (v > thismaxv) {
+                        thismaxv = v;
+                        maxrowidx[rowi] = lastmaxi;
+                    }
+                }
+
+                // check column lastmaxj
+                if (lastmaxj != rowi) {
+                    double v = fabs(MATD_EL(B, rowi, lastmaxj));
+                    if (v > thismaxv) {
+                        thismaxv = v;
+                        maxrowidx[rowi] = lastmaxj;
+                    }
+                }
+
+                // does this row have the largest value we've seen so far?
+              endrowi:
+                if (thismaxv > maxv) {
+                    maxv = thismaxv;
+                    maxi = rowi;
+                }
+            }
+
+            assert(maxi >= 0);
+            maxj = maxrowidx[maxi];
+
+            // save these for the next iteration.
+            lastmaxi = maxi;
+            lastmaxj = maxj;
+
+            if (maxv < tol)
+                break;
+
+        } else if (find_max_method == 2) {
+            // brute-force (reference) version.
+            maxv = -1;
+
+            // only search top "square" portion
+            for (int i = 0; i < B->ncols; i++) {
+                for (int j = 0; j < B->ncols; j++) {
+                    if (i == j)
+                        continue;
+
+                    double v = fabs(MATD_EL(B, i, j));
+
+                    if (v > maxv) {
+                        maxi = i;
+                        maxj = j;
+                        maxv = v;
+                    }
+                }
+            }
+
+            // termination condition.
+            if (maxv < tol)
+                break;
+        } else {
+            assert(0);
+        }
+
+//        printf(">>> %5d %3d, %3d %15g\n", maxi, maxj, iter, maxv);
+
+        // Now, solve the 2x2 SVD problem for the matrix
+        // [ A0 A1 ]
+        // [ A2 A3 ]
+        double A0 = MATD_EL(B, maxi, maxi);
+        double A1 = MATD_EL(B, maxi, maxj);
+        double A2 = MATD_EL(B, maxj, maxi);
+        double A3 = MATD_EL(B, maxj, maxj);
+
+        if (1) {
+            double AQ[4];
+            AQ[0] = A0;
+            AQ[1] = A1;
+            AQ[2] = A2;
+            AQ[3] = A3;
+
+            double U[4], S[2], V[4];
+            svd22(AQ, U, S, V);
+
+/*  Reference (slow) implementation...
+
+            // LS = LS * ROT(theta) = LS * QL
+            matd_t *QL = matd_identity(A->nrows);
+            MATD_EL(QL, maxi, maxi) = U[0];
+            MATD_EL(QL, maxi, maxj) = U[1];
+            MATD_EL(QL, maxj, maxi) = U[2];
+            MATD_EL(QL, maxj, maxj) = U[3];
+
+            matd_t *QR = matd_identity(A->ncols);
+            MATD_EL(QR, maxi, maxi) = V[0];
+            MATD_EL(QR, maxi, maxj) = V[1];
+            MATD_EL(QR, maxj, maxi) = V[2];
+            MATD_EL(QR, maxj, maxj) = V[3];
+
+            LS = matd_op("F*M", LS, QL);
+            RS = matd_op("F*M", RS, QR); // remember we'll transpose RS.
+            B = matd_op("M'*F*M", QL, B, QR);
+
+            matd_destroy(QL);
+            matd_destroy(QR);
+*/
+
+            //  LS = matd_op("F*M", LS, QL);
+            for (int i = 0; i < LS->nrows; i++) {
+                double vi = MATD_EL(LS, i, maxi);
+                double vj = MATD_EL(LS, i, maxj);
+
+                MATD_EL(LS, i, maxi) = U[0]*vi + U[2]*vj;
+                MATD_EL(LS, i, maxj) = U[1]*vi + U[3]*vj;
+            }
+
+            //  RS = matd_op("F*M", RS, QR); // remember we'll transpose RS.
+            for (int i = 0; i < RS->nrows; i++) {
+                double vi = MATD_EL(RS, i, maxi);
+                double vj = MATD_EL(RS, i, maxj);
+
+                MATD_EL(RS, i, maxi) = V[0]*vi + V[2]*vj;
+                MATD_EL(RS, i, maxj) = V[1]*vi + V[3]*vj;
+            }
+
+            // B = matd_op("M'*F*M", QL, B, QR);
+            // The QL matrix mixes rows of B.
+            for (int i = 0; i < B->ncols; i++) {
+                double vi = MATD_EL(B, maxi, i);
+                double vj = MATD_EL(B, maxj, i);
+
+                MATD_EL(B, maxi, i) = U[0]*vi + U[2]*vj;
+                MATD_EL(B, maxj, i) = U[1]*vi + U[3]*vj;
+            }
+
+            // The QR matrix mixes columns of B.
+            for (int i = 0; i < B->nrows; i++) {
+                double vi = MATD_EL(B, i, maxi);
+                double vj = MATD_EL(B, i, maxj);
+
+                MATD_EL(B, i, maxi) = V[0]*vi + V[2]*vj;
+                MATD_EL(B, i, maxj) = V[1]*vi + V[3]*vj;
+            }
+        }
+    }
+
+    free(maxrowidx);
+
+    if (!(flags & MATD_SVD_NO_WARNINGS) && iter == maxiters) {
+        debug_print("WARNING: maximum iters (maximum = %d, matrix %d x %d, max=%.15f)\n",
+               iter, A->nrows, A->ncols, maxv);
+
+//        matd_print(A, "%15f");
+    }
+
+    // them all positive by flipping the corresponding columns of
+    // U/LS.
+    int *idxs = malloc(sizeof(int)*A->ncols);
+    double *vals = malloc(sizeof(double)*A->ncols);
+    for (int i = 0; i < A->ncols; i++) {
+        idxs[i] = i;
+        vals[i] = MATD_EL(B, i, i);
+    }
+
+    // A bubble sort. Seriously.
+    int changed;
+    do {
+        changed = 0;
+
+        for (int i = 0; i + 1 < A->ncols; i++) {
+            if (fabs(vals[i+1]) > fabs(vals[i])) {
+                int tmpi = idxs[i];
+                idxs[i] = idxs[i+1];
+                idxs[i+1] = tmpi;
+
+                double tmpv = vals[i];
+                vals[i] = vals[i+1];
+                vals[i+1] = tmpv;
+
+                changed = 1;
+            }
+        }
+    } while (changed);
+
+    matd_t *LP = matd_identity(A->nrows);
+    matd_t *RP = matd_identity(A->ncols);
+
+    for (int i = 0; i < A->ncols; i++) {
+        MATD_EL(LP, idxs[i], idxs[i]) = 0; // undo the identity above
+        MATD_EL(RP, idxs[i], idxs[i]) = 0;
+
+        MATD_EL(LP, idxs[i], i) = vals[i] < 0 ? -1 : 1;
+        MATD_EL(RP, idxs[i], i) = 1; //vals[i] < 0 ? -1 : 1;
+    }
+    free(idxs);
+    free(vals);
+
+    // we've factored:
+    // LP*(something)*RP'
+
+    // solve for (something)
+    B = matd_op("M'*F*M", LP, B, RP);
+
+    // update LS and RS, remembering that RS will be transposed.
+    LS = matd_op("F*M", LS, LP);
+    RS = matd_op("F*M", RS, RP);
+
+    matd_destroy(LP);
+    matd_destroy(RP);
+
+    matd_svd_t res;
+    memset(&res, 0, sizeof(res));
+
+    // make B exactly diagonal
+
+    for (int i = 0; i < B->nrows; i++) {
+        for (int j = 0; j < B->ncols; j++) {
+            if (i != j)
+                MATD_EL(B, i, j) = 0;
+        }
+    }
+
+    res.U = LS;
+    res.S = B;
+    res.V = RS;
+
+    return res;
+}
+
+matd_svd_t matd_svd(matd_t *A)
+{
+    return matd_svd_flags(A, 0);
+}
+
+matd_svd_t matd_svd_flags(matd_t *A, int flags)
+{
+    matd_svd_t res;
+
+    if (A->ncols <= A->nrows) {
+        res = matd_svd_tall(A, flags);
+    } else {
+        matd_t *At = matd_transpose(A);
+
+        // A =U  S  V'
+        // A'=V  S' U'
+
+        matd_svd_t tmp = matd_svd_tall(At, flags);
+
+        memset(&res, 0, sizeof(res));
+        res.U = tmp.V; //matd_transpose(tmp.V);
+        res.S = matd_transpose(tmp.S);
+        res.V = tmp.U; //matd_transpose(tmp.U);
+
+        matd_destroy(tmp.S);
+        matd_destroy(At);
+    }
+
+/*
+  matd_t *check = matd_op("M*M*M'-M", res.U, res.S, res.V, A);
+  double maxerr = 0;
+
+  for (int i = 0; i < check->nrows; i++)
+  for (int j = 0; j < check->ncols; j++)
+  maxerr = fmax(maxerr, fabs(MATD_EL(check, i, j)));
+
+  matd_destroy(check);
+
+  if (maxerr > 1e-7) {
+  printf("bad maxerr: %15f\n", maxerr);
+  }
+
+  if (maxerr > 1e-5) {
+  printf("bad maxerr: %15f\n", maxerr);
+  matd_print(A, "%15f");
+  assert(0);
+  }
+
+*/
+    return res;
+}
+
+
+matd_plu_t *matd_plu(const matd_t *a)
+{
+    unsigned int *piv = calloc(a->nrows, sizeof(unsigned int));
+    int pivsign = 1;
+    matd_t *lu = matd_copy(a);
+
+    // only for square matrices.
+    assert(a->nrows == a->ncols);
+
+    matd_plu_t *mlu = calloc(1, sizeof(matd_plu_t));
+
+    for (int i = 0; i < a->nrows; i++)
+        piv[i] = i;
+
+    for (int j = 0; j < a->ncols; j++) {
+        for (int i = 0; i < a->nrows; i++) {
+            int kmax = i < j ? i : j; // min(i,j)
+
+            // compute dot product of row i with column j (up through element kmax)
+            double acc = 0;
+            for (int k = 0; k < kmax; k++)
+                acc += MATD_EL(lu, i, k) * MATD_EL(lu, k, j);
+
+            MATD_EL(lu, i, j) -= acc;
+        }
+
+        // find pivot and exchange if necessary.
+        int p = j;
+        if (1) {
+            for (int i = j+1; i < lu->nrows; i++) {
+                if (fabs(MATD_EL(lu,i,j)) > fabs(MATD_EL(lu, p, j))) {
+                    p = i;
+                }
+            }
+        }
+
+        // swap rows p and j?
+        if (p != j) {
+            TYPE *tmp = malloc(sizeof(TYPE)*lu->ncols);
+            memcpy(tmp, &MATD_EL(lu, p, 0), sizeof(TYPE) * lu->ncols);
+            memcpy(&MATD_EL(lu, p, 0), &MATD_EL(lu, j, 0), sizeof(TYPE) * lu->ncols);
+            memcpy(&MATD_EL(lu, j, 0), tmp, sizeof(TYPE) * lu->ncols);
+            int k = piv[p];
+            piv[p] = piv[j];
+            piv[j] = k;
+            pivsign = -pivsign;
+            free(tmp);
+        }
+
+        double LUjj = MATD_EL(lu, j, j);
+
+        // If our pivot is very small (which means the matrix is
+        // singular or nearly singular), replace with a new pivot of the
+        // right sign.
+        if (fabs(LUjj) < MATD_EPS) {
+/*
+            if (LUjj < 0)
+                LUjj = -MATD_EPS;
+            else
+                LUjj = MATD_EPS;
+
+            MATD_EL(lu, j, j) = LUjj;
+*/
+            mlu->singular = 1;
+        }
+
+        if (j < lu->ncols && j < lu->nrows && LUjj != 0) {
+            LUjj = 1.0 / LUjj;
+            for (int i = j+1; i < lu->nrows; i++)
+                MATD_EL(lu, i, j) *= LUjj;
+        }
+    }
+
+    mlu->lu = lu;
+    mlu->piv = piv;
+    mlu->pivsign = pivsign;
+
+    return mlu;
+}
+
+void matd_plu_destroy(matd_plu_t *mlu)
+{
+    matd_destroy(mlu->lu);
+    free(mlu->piv);
+    memset(mlu, 0, sizeof(matd_plu_t));
+    free(mlu);
+}
+
+double matd_plu_det(const matd_plu_t *mlu)
+{
+    matd_t *lu = mlu->lu;
+    double det = mlu->pivsign;
+
+    if (lu->nrows == lu->ncols) {
+        for (int i = 0; i < lu->ncols; i++)
+            det *= MATD_EL(lu, i, i);
+    }
+
+    return det;
+}
+
+matd_t *matd_plu_p(const matd_plu_t *mlu)
+{
+    matd_t *lu = mlu->lu;
+    matd_t *P = matd_create(lu->nrows, lu->nrows);
+
+    for (int i = 0; i < lu->nrows; i++) {
+        MATD_EL(P, mlu->piv[i], i) = 1;
+    }
+
+    return P;
+}
+
+matd_t *matd_plu_l(const matd_plu_t *mlu)
+{
+    matd_t *lu = mlu->lu;
+
+    matd_t *L = matd_create(lu->nrows, lu->ncols);
+    for (int i = 0; i < lu->nrows; i++) {
+        MATD_EL(L, i, i) = 1;
+
+        for (int j = 0; j < i; j++) {
+            MATD_EL(L, i, j) = MATD_EL(lu, i, j);
+        }
+    }
+
+    return L;
+}
+
+matd_t *matd_plu_u(const matd_plu_t *mlu)
+{
+    matd_t *lu = mlu->lu;
+
+    matd_t *U = matd_create(lu->ncols, lu->ncols);
+    for (int i = 0; i < lu->ncols; i++) {
+        for (int j = 0; j < lu->ncols; j++) {
+            if (i <= j)
+                MATD_EL(U, i, j) = MATD_EL(lu, i, j);
+        }
+    }
+
+    return U;
+}
+
+// PLU = A
+// Ax = B
+// PLUx = B
+// LUx = P'B
+matd_t *matd_plu_solve(const matd_plu_t *mlu, const matd_t *b)
+{
+    matd_t *x = matd_copy(b);
+
+    // permute right hand side
+    for (int i = 0; i < mlu->lu->nrows; i++)
+        memcpy(&MATD_EL(x, i, 0), &MATD_EL(b, mlu->piv[i], 0), sizeof(TYPE) * b->ncols);
+
+    // solve Ly = b
+    for (int k = 0; k < mlu->lu->nrows; k++) {
+        for (int i = k+1; i < mlu->lu->nrows; i++) {
+            double LUik = -MATD_EL(mlu->lu, i, k);
+            for (int t = 0; t < b->ncols; t++)
+                MATD_EL(x, i, t) += MATD_EL(x, k, t) * LUik;
+        }
+    }
+
+    // solve Ux = y
+    for (int k = mlu->lu->ncols-1; k >= 0; k--) {
+        double LUkk = 1.0 / MATD_EL(mlu->lu, k, k);
+        for (int t = 0; t < b->ncols; t++)
+            MATD_EL(x, k, t) *= LUkk;
+
+        for (int i = 0; i < k; i++) {
+            double LUik = -MATD_EL(mlu->lu, i, k);
+            for (int t = 0; t < b->ncols; t++)
+                MATD_EL(x, i, t) += MATD_EL(x, k, t) *LUik;
+        }
+    }
+
+    return x;
+}
+
+matd_t *matd_solve(matd_t *A, matd_t *b)
+{
+    matd_plu_t *mlu = matd_plu(A);
+    matd_t *x = matd_plu_solve(mlu, b);
+
+    matd_plu_destroy(mlu);
+    return x;
+}
+
+#if 0
+
+static int randi()
+{
+    int v = random()&31;
+    v -= 15;
+    return v;
+}
+
+static double randf()
+{
+    double v = 1.0 *random() / RAND_MAX;
+    return 2*v - 1;
+}
+
+int main(int argc, char *argv[])
+{
+    if (1) {
+        int maxdim = 16;
+        matd_t *A = matd_create(maxdim, maxdim);
+
+        for (int iter = 0; 1; iter++) {
+            srand(iter);
+
+            if (iter % 1000 == 0)
+                printf("%d\n", iter);
+
+            int m = 1 + (random()%(maxdim-1));
+            int n = 1 + (random()%(maxdim-1));
+
+            for (int i = 0; i < m*n; i++)
+                A->data[i] = randi();
+
+            A->nrows = m;
+            A->ncols = n;
+
+//            printf("%d %d ", m, n);
+            matd_svd_t svd = matd_svd(A);
+            matd_destroy(svd.U);
+            matd_destroy(svd.S);
+            matd_destroy(svd.V);
+
+        }
+
+/*        matd_t *A = matd_create_data(2, 5, (double[]) { 1, 5, 2, 6,
+          3, 3, 0, 7,
+          1, 1, 0, -2,
+          4, 0, 9, 9, 2, 6, 1, 3, 2, 5, 5, 4, -1, 2, 5, 9, 8, 2 });
+
+          matd_svd(A);
+*/
+        return 0;
+    }
+
+
+    struct svd22 s;
+
+    srand(0);
+
+    matd_t *A = matd_create(2, 2);
+    MATD_EL(A,0,0) = 4;
+    MATD_EL(A,0,1) = 7;
+    MATD_EL(A,1,0) = 2;
+    MATD_EL(A,1,1) = 6;
+
+    matd_t *U = matd_create(2, 2);
+    matd_t *V = matd_create(2, 2);
+    matd_t *S = matd_create(2, 2);
+
+    for (int iter = 0; 1; iter++) {
+        if (iter % 100000 == 0)
+            printf("%d\n", iter);
+
+        MATD_EL(A,0,0) = randf();
+        MATD_EL(A,0,1) = randf();
+        MATD_EL(A,1,0) = randf();
+        MATD_EL(A,1,1) = randf();
+
+        matd_svd22_impl(A->data, &s);
+
+        memcpy(U->data, s.U, 4*sizeof(double));
+        memcpy(V->data, s.V, 4*sizeof(double));
+        MATD_EL(S,0,0) = s.S[0];
+        MATD_EL(S,1,1) = s.S[1];
+
+        assert(s.S[0] >= s.S[1]);
+        assert(s.S[0] >= 0);
+        assert(s.S[1] >= 0);
+        if (s.S[0] == 0) {
+//            printf("*"); fflush(NULL);
+//            printf("%15f %15f %15f %15f\n", MATD_EL(A,0,0), MATD_EL(A,0,1), MATD_EL(A,1,0), MATD_EL(A,1,1));
+        }
+        if (s.S[1] == 0) {
+//            printf("#"); fflush(NULL);
+        }
+
+        matd_t *USV = matd_op("M*M*M'", U, S, V);
+
+        double maxerr = 0;
+        for (int i = 0; i < 4; i++)
+            maxerr = fmax(maxerr, fabs(USV->data[i] - A->data[i]));
+
+        if (0) {
+            printf("------------------------------------\n");
+            printf("A:\n");
+            matd_print(A, "%15f");
+            printf("\nUSV':\n");
+            matd_print(USV, "%15f");
+            printf("maxerr: %.15f\n", maxerr);
+            printf("\n\n");
+        }
+
+        matd_destroy(USV);
+
+        assert(maxerr < 0.00001);
+    }
+}
+
+#endif
+
+// XXX NGV Cholesky
+/*static double *matd_cholesky_raw(double *A, int n)
+  {
+  double *L = (double*)calloc(n * n, sizeof(double));
+
+  for (int i = 0; i < n; i++) {
+  for (int j = 0; j < (i+1); j++) {
+  double s = 0;
+  for (int k = 0; k < j; k++)
+  s += L[i * n + k] * L[j * n + k];
+  L[i * n + j] = (i == j) ?
+  sqrt(A[i * n + i] - s) :
+  (1.0 / L[j * n + j] * (A[i * n + j] - s));
+  }
+  }
+
+  return L;
+  }
+
+  matd_t *matd_cholesky(const matd_t *A)
+  {
+  assert(A->nrows == A->ncols);
+  double *L_data = matd_cholesky_raw(A->data, A->nrows);
+  matd_t *L = matd_create_data(A->nrows, A->ncols, L_data);
+  free(L_data);
+  return L;
+  }*/
+
+// NOTE: The below implementation of Cholesky is different from the one
+// used in NGV.
+matd_chol_t *matd_chol(matd_t *A)
+{
+    assert(A->nrows == A->ncols);
+    int N = A->nrows;
+
+    // make upper right
+    matd_t *U = matd_copy(A);
+
+    // don't actually need to clear lower-left... we won't touch it.
+/*    for (int i = 0; i < U->nrows; i++) {
+      for (int j = 0; j < i; j++) {
+//            assert(MATD_EL(U, i, j) == MATD_EL(U, j, i));
+MATD_EL(U, i, j) = 0;
+}
+}
+*/
+    int is_spd = 1; // (A->nrows == A->ncols);
+
+    for (int i = 0; i < N; i++) {
+        double d = MATD_EL(U, i, i);
+        is_spd &= (d > 0);
+
+        if (d < MATD_EPS)
+            d = MATD_EPS;
+        d = 1.0 / sqrt(d);
+
+        for (int j = i; j < N; j++)
+            MATD_EL(U, i, j) *= d;
+
+        for (int j = i+1; j < N; j++) {
+            double s = MATD_EL(U, i, j);
+
+            if (s == 0)
+                continue;
+
+            for (int k = j; k < N; k++) {
+                MATD_EL(U, j, k) -= MATD_EL(U, i, k)*s;
+            }
+        }
+    }
+
+    matd_chol_t *chol = calloc(1, sizeof(matd_chol_t));
+    chol->is_spd = is_spd;
+    chol->u = U;
+    return chol;
+}
+
+void matd_chol_destroy(matd_chol_t *chol)
+{
+    matd_destroy(chol->u);
+    free(chol);
+}
+
+// Solve: (U')x = b, U is upper triangular
+void matd_ltransposetriangle_solve(matd_t *u, const TYPE *b, TYPE *x)
+{
+    int n = u->ncols;
+    memcpy(x, b, n*sizeof(TYPE));
+    for (int i = 0; i < n; i++) {
+        x[i] /= MATD_EL(u, i, i);
+
+        for (int j = i+1; j < u->ncols; j++) {
+            x[j] -= x[i] * MATD_EL(u, i, j);
+        }
+    }
+}
+
+// Solve: Lx = b, L is lower triangular
+void matd_ltriangle_solve(matd_t *L, const TYPE *b, TYPE *x)
+{
+    int n = L->ncols;
+
+    for (int i = 0; i < n; i++) {
+        double acc = b[i];
+
+        for (int j = 0; j < i; j++) {
+            acc -= MATD_EL(L, i, j)*x[j];
+        }
+
+        x[i] = acc / MATD_EL(L, i, i);
+    }
+}
+
+// solve Ux = b, U is upper triangular
+void matd_utriangle_solve(matd_t *u, const TYPE *b, TYPE *x)
+{
+    for (int i = u->ncols-1; i >= 0; i--) {
+        double bi = b[i];
+
+        double diag = MATD_EL(u, i, i);
+
+        for (int j = i+1; j < u->ncols; j++)
+            bi -= MATD_EL(u, i, j)*x[j];
+
+        x[i] = bi / diag;
+    }
+}
+
+matd_t *matd_chol_solve(const matd_chol_t *chol, const matd_t *b)
+{
+    matd_t *u = chol->u;
+
+    matd_t *x = matd_copy(b);
+
+    // LUx = b
+
+    // solve Ly = b ==> (U')y = b
+
+    for (int i = 0; i < u->nrows; i++) {
+        for (int j = 0; j < i; j++) {
+            // b[i] -= L[i,j]*x[j]... replicated across columns of b
+            //   ==> i.e., ==>
+            // b[i,k] -= L[i,j]*x[j,k]
+            for (int k = 0; k < b->ncols; k++) {
+                MATD_EL(x, i, k) -= MATD_EL(u, j, i)*MATD_EL(x, j, k);
+            }
+        }
+        // x[i] = b[i] / L[i,i]
+        for (int k = 0; k < b->ncols; k++) {
+            MATD_EL(x, i, k) /= MATD_EL(u, i, i);
+        }
+    }
+
+    // solve Ux = y
+    for (int k = u->ncols-1; k >= 0; k--) {
+        double LUkk = 1.0 / MATD_EL(u, k, k);
+        for (int t = 0; t < b->ncols; t++)
+            MATD_EL(x, k, t) *= LUkk;
+
+        for (int i = 0; i < k; i++) {
+            double LUik = -MATD_EL(u, i, k);
+            for (int t = 0; t < b->ncols; t++)
+                MATD_EL(x, i, t) += MATD_EL(x, k, t) *LUik;
+        }
+    }
+
+    return x;
+}
+
+/*void matd_chol_solve(matd_chol_t *chol, const TYPE *b, TYPE *x)
+  {
+  matd_t *u = chol->u;
+
+  TYPE y[u->ncols];
+  matd_ltransposetriangle_solve(u, b, y);
+  matd_utriangle_solve(u, y, x);
+  }
+*/
+// only sensible on PSD matrices. had expected it to be faster than
+// inverse via LU... for now, doesn't seem to be.
+matd_t *matd_chol_inverse(matd_t *a)
+{
+    assert(a->nrows == a->ncols);
+
+    matd_chol_t *chol = matd_chol(a);
+
+    matd_t *eye = matd_identity(a->nrows);
+    matd_t *inv = matd_chol_solve(chol, eye);
+    matd_destroy(eye);
+    matd_chol_destroy(chol);
+
+    return inv;
+}
+
+double matd_max(matd_t *m)
+{
+    double d = -DBL_MAX;
+    for(int x=0; x<m->nrows; x++) {
+        for(int y=0; y<m->ncols; y++) {
+            if(MATD_EL(m, x, y) > d)
+                d = MATD_EL(m, x, y);
+        }
+    }
+
+    return d;
+}
diff --git a/common/matd.h b/common/matd.h
new file mode 100644
index 0000000..3a79c56
--- /dev/null
+++ b/common/matd.h
@@ -0,0 +1,446 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Defines a matrix structure for holding double-precision values with
+ * data in row-major order (i.e. index = row*ncols + col).
+ *
+ * nrows and ncols are 1-based counts with the exception that a scalar (non-matrix)
+ *   is represented with nrows=0 and/or ncols=0.
+ */
+typedef struct
+{
+    unsigned int nrows, ncols;
+    double data[];
+//    double *data;
+} matd_t;
+
+#define MATD_ALLOC(name, nrows, ncols) double name ## _storage [nrows*ncols]; matd_t name = { .nrows = nrows, .ncols = ncols, .data = &name ## _storage };
+
+/**
+ * Defines a small value which can be used in place of zero for approximating
+ * calculations which are singular at zero values (i.e. inverting a matrix with
+ * a zero or near-zero determinant).
+ */
+#define MATD_EPS 1e-8
+
+/**
+ * A macro to reference a specific matd_t data element given it's zero-based
+ * row and column indexes. Suitable for both retrieval and assignment.
+ */
+#define MATD_EL(m, row, col) (m)->data[((row)*(m)->ncols + (col))]
+
+/**
+ * Creates a double matrix with the given number of rows and columns (or a scalar
+ * in the case where rows=0 and/or cols=0). All data elements will be initialized
+ * to zero. It is the caller's responsibility to call matd_destroy() on the
+ * returned matrix.
+ */
+matd_t *matd_create(int rows, int cols);
+
+/**
+ * Creates a double matrix with the given number of rows and columns (or a scalar
+ * in the case where rows=0 and/or cols=0). All data elements will be initialized
+ * using the supplied array of data, which must contain at least rows*cols elements,
+ * arranged in row-major order (i.e. index = row*ncols + col). It is the caller's
+ * responsibility to call matd_destroy() on the returned matrix.
+ */
+matd_t *matd_create_data(int rows, int cols, const double *data);
+
+/**
+ * Creates a double matrix with the given number of rows and columns (or a scalar
+ * in the case where rows=0 and/or cols=0). All data elements will be initialized
+ * using the supplied array of float data, which must contain at least rows*cols elements,
+ * arranged in row-major order (i.e. index = row*ncols + col). It is the caller's
+ * responsibility to call matd_destroy() on the returned matrix.
+ */
+matd_t *matd_create_dataf(int rows, int cols, const float *data);
+
+/**
+ * Creates a square identity matrix with the given number of rows (and
+ * therefore columns), or a scalar with value 1 in the case where dim=0.
+ * It is the caller's responsibility to call matd_destroy() on the
+ * returned matrix.
+ */
+matd_t *matd_identity(int dim);
+
+/**
+ * Creates a scalar with the supplied value 'v'. It is the caller's responsibility
+ * to call matd_destroy() on the returned matrix.
+ *
+ * NOTE: Scalars are different than 1x1 matrices (implementation note:
+ * they are encoded as 0x0 matrices). For example: for matrices A*B, A
+ * and B must both have specific dimensions. However, if A is a
+ * scalar, there are no restrictions on the size of B.
+ */
+matd_t *matd_create_scalar(double v);
+
+/**
+ * Retrieves the cell value for matrix 'm' at the given zero-based row and column index.
+ * Performs more thorough validation checking than MATD_EL().
+ */
+double matd_get(const matd_t *m, int row, int col);
+
+/**
+ * Assigns the given value to the matrix cell at the given zero-based row and
+ * column index. Performs more thorough validation checking than MATD_EL().
+ */
+void matd_put(matd_t *m, int row, int col, double value);
+
+/**
+ * Retrieves the scalar value of the given element ('m' must be a scalar).
+ * Performs more thorough validation checking than MATD_EL().
+ */
+double matd_get_scalar(const matd_t *m);
+
+/**
+ * Assigns the given value to the supplied scalar element ('m' must be a scalar).
+ * Performs more thorough validation checking than MATD_EL().
+ */
+void matd_put_scalar(matd_t *m, double value);
+
+/**
+ * Creates an exact copy of the supplied matrix 'm'. It is the caller's
+ * responsibility to call matd_destroy() on the returned matrix.
+ */
+matd_t *matd_copy(const matd_t *m);
+
+/**
+ * Creates a copy of a subset of the supplied matrix 'a'. The subset will include
+ * rows 'r0' through 'r1', inclusive ('r1' >= 'r0'), and columns 'c0' through 'c1',
+ * inclusive ('c1' >= 'c0'). All parameters are zero-based (i.e. matd_select(a, 0, 0, 0, 0)
+ * will return only the first cell). Cannot be used on scalars or to extend
+ * beyond the number of rows/columns of 'a'. It is the caller's  responsibility to
+ * call matd_destroy() on the returned matrix.
+ */
+matd_t *matd_select(const matd_t *a, int r0, int r1, int c0, int c1);
+
+/**
+ * Prints the supplied matrix 'm' to standard output by applying the supplied
+ * printf format specifier 'fmt' for each individual element. Each row will
+ * be printed on a separate newline.
+ */
+void matd_print(const matd_t *m, const char *fmt);
+
+/**
+ * Prints the transpose of the supplied matrix 'm' to standard output by applying
+ * the supplied printf format specifier 'fmt' for each individual element. Each
+ * row will be printed on a separate newline.
+ */
+void matd_print_transpose(const matd_t *m, const char *fmt);
+
+/**
+ * Adds the two supplied matrices together, cell-by-cell, and returns the results
+ * as a new matrix of the same dimensions. The supplied matrices must have
+ * identical dimensions.  It is the caller's responsibility to call matd_destroy()
+ * on the returned matrix.
+ */
+matd_t *matd_add(const matd_t *a, const matd_t *b);
+
+/**
+ * Adds the values of 'b' to matrix 'a', cell-by-cell, and overwrites the
+ * contents of 'a' with the results. The supplied matrices must have
+ * identical dimensions.
+ */
+void matd_add_inplace(matd_t *a, const matd_t *b);
+
+/**
+ * Subtracts matrix 'b' from matrix 'a', cell-by-cell, and returns the results
+ * as a new matrix of the same dimensions. The supplied matrices must have
+ * identical dimensions.  It is the caller's responsibility to call matd_destroy()
+ * on the returned matrix.
+ */
+matd_t *matd_subtract(const matd_t *a, const matd_t *b);
+
+/**
+ * Subtracts the values of 'b' from matrix 'a', cell-by-cell, and overwrites the
+ * contents of 'a' with the results. The supplied matrices must have
+ * identical dimensions.
+ */
+void matd_subtract_inplace(matd_t *a, const matd_t *b);
+
+/**
+ * Scales all cell values of matrix 'a' by the given scale factor 's' and
+ * returns the result as a new matrix of the same dimensions. It is the caller's
+ * responsibility to call matd_destroy() on the returned matrix.
+ */
+matd_t *matd_scale(const matd_t *a, double s);
+
+/**
+ * Scales all cell values of matrix 'a' by the given scale factor 's' and
+ * overwrites the contents of 'a' with the results.
+ */
+void matd_scale_inplace(matd_t *a, double s);
+
+/**
+ * Multiplies the two supplied matrices together (matrix product), and returns the
+ * results as a new matrix. The supplied matrices must have dimensions such that
+ * columns(a) = rows(b). The returned matrix will have a row count of rows(a)
+ * and a column count of columns(b). It is the caller's responsibility to call
+ * matd_destroy() on the returned matrix.
+ */
+matd_t *matd_multiply(const matd_t *a, const matd_t *b);
+
+/**
+ * Creates a matrix which is the transpose of the supplied matrix 'a'. It is the
+ * caller's responsibility to call matd_destroy() on the returned matrix.
+ */
+matd_t *matd_transpose(const matd_t *a);
+
+/**
+ * Calculates the determinant of the supplied matrix 'a'.
+ */
+double matd_det(const matd_t *a);
+
+/**
+ * Attempts to compute an inverse of the supplied matrix 'a' and return it as
+ * a new matrix. This is strictly only possible if the determinant of 'a' is
+ * non-zero (matd_det(a) != 0).
+ *
+ * If the determinant is zero, NULL is returned. It is otherwise the
+ * caller's responsibility to cope with the results caused by poorly
+ * conditioned matrices. (E.g.., if such a situation is likely to arise, compute
+ * the pseudo-inverse from the SVD.)
+ **/
+matd_t *matd_inverse(const matd_t *a);
+
+static inline void matd_set_data(matd_t *m, const double *data)
+{
+    memcpy(m->data, data, m->nrows * m->ncols * sizeof(double));
+}
+
+/**
+ * Determines whether the supplied matrix 'a' is a scalar (positive return) or
+ * not (zero return, indicating a matrix of dimensions at least 1x1).
+ */
+static inline int matd_is_scalar(const matd_t *a)
+{
+    assert(a != NULL);
+    return a->ncols <= 1 && a->nrows <= 1;
+}
+
+/**
+ * Determines whether the supplied matrix 'a' is a row or column vector
+ * (positive return) or not (zero return, indicating either 'a' is a scalar or a
+ * matrix with at least one dimension > 1).
+ */
+static inline int matd_is_vector(const matd_t *a)
+{
+    assert(a != NULL);
+    return a->ncols == 1 || a->nrows == 1;
+}
+
+/**
+ * Determines whether the supplied matrix 'a' is a row or column vector
+ * with a dimension of 'len' (positive return) or not (zero return).
+ */
+static inline int matd_is_vector_len(const matd_t *a, int len)
+{
+    assert(a != NULL);
+    return (a->ncols == 1 && a->nrows == (unsigned int)len) || (a->ncols == (unsigned int)len && a->nrows == 1);
+}
+
+/**
+ * Calculates the magnitude of the supplied matrix 'a'.
+ */
+double matd_vec_mag(const matd_t *a);
+
+/**
+ * Calculates the magnitude of the distance between the points represented by
+ * matrices 'a' and 'b'. Both 'a' and 'b' must be vectors and have the same
+ * dimension (although one may be a row vector and one may be a column vector).
+ */
+double matd_vec_dist(const matd_t *a, const matd_t *b);
+
+
+/**
+ * Same as matd_vec_dist, but only uses the first 'n' terms to compute distance
+ */
+double matd_vec_dist_n(const matd_t *a, const matd_t *b, int n);
+
+/**
+ * Calculates the dot product of two vectors. Both 'a' and 'b' must be vectors
+ * and have the same dimension (although one may be a row vector and one may be
+ * a column vector).
+ */
+double matd_vec_dot_product(const matd_t *a, const matd_t *b);
+
+/**
+ * Calculates the normalization of the supplied vector 'a' (i.e. a unit vector
+ * of the same dimension and orientation as 'a' with a magnitude of 1) and returns
+ * it as a new vector. 'a' must be a vector of any dimension and must have a
+ * non-zero magnitude. It is the caller's responsibility to call matd_destroy()
+ * on the returned matrix.
+ */
+matd_t *matd_vec_normalize(const matd_t *a);
+
+/**
+ * Calculates the cross product of supplied matrices 'a' and 'b' (i.e. a x b)
+ * and returns it as a new matrix. Both 'a' and 'b' must be vectors of dimension
+ * 3, but can be either row or column vectors. It is the caller's responsibility
+ * to call matd_destroy() on the returned matrix.
+ */
+matd_t *matd_crossproduct(const matd_t *a, const matd_t *b);
+
+double matd_err_inf(const matd_t *a, const matd_t *b);
+
+/**
+ * Creates a new matrix by applying a series of matrix operations, as expressed
+ * in 'expr', to the supplied list of matrices. Each matrix to be operated upon
+ * must be represented in the expression by a separate matrix placeholder, 'M',
+ * and there must be one matrix supplied as an argument for each matrix
+ * placeholder in the expression. All rules and caveats of the corresponding
+ * matrix operations apply to the operated-on matrices. It is the caller's
+ * responsibility to call matd_destroy() on the returned matrix.
+ *
+ * Available operators (in order of increasing precedence):
+ *   M+M   add two matrices together
+ *   M-M   subtract one matrix from another
+ *   M*M   multiply two matrices together (matrix product)
+ *   MM    multiply two matrices together (matrix product)
+ *   -M    negate a matrix
+ *   M^-1  take the inverse of a matrix
+ *   M'    take the transpose of a matrix
+ *
+ * Expressions can be combined together and grouped by enclosing them in
+ * parenthesis, i.e.:
+ *   -M(M+M+M)-(M*M)^-1
+ *
+ * Scalar values can be generated on-the-fly, i.e.:
+ *   M*2.2  scales M by 2.2
+ *   -2+M   adds -2 to all elements of M
+ *
+ * All whitespace in the expression is ignored.
+ */
+matd_t *matd_op(const char *expr, ...);
+
+/**
+ * Frees the memory associated with matrix 'm', being the result of an earlier
+ * call to a matd_*() function, after which 'm' will no longer be usable.
+ */
+void matd_destroy(matd_t *m);
+
+typedef struct
+{
+    matd_t *U;
+    matd_t *S;
+    matd_t *V;
+} matd_svd_t;
+
+/** Compute a complete SVD of a matrix. The SVD exists for all
+ * matrices. For a matrix MxN, we will have:
+ *
+ * A = U*S*V'
+ *
+ * where A is MxN, U is MxM (and is an orthonormal basis), S is MxN
+ * (and is diagonal up to machine precision), and V is NxN (and is an
+ * orthonormal basis).
+ *
+ * The caller is responsible for destroying U, S, and V.
+ **/
+matd_svd_t matd_svd(matd_t *A);
+
+#define MATD_SVD_NO_WARNINGS 1
+    matd_svd_t matd_svd_flags(matd_t *A, int flags);
+
+////////////////////////////////
+// PLU Decomposition
+
+// All square matrices (even singular ones) have a partially-pivoted
+// LU decomposition such that A = PLU, where P is a permutation
+// matrix, L is a lower triangular matrix, and U is an upper
+// triangular matrix.
+//
+typedef struct
+{
+    // was the input matrix singular? When a zero pivot is found, this
+    // flag is set to indicate that this has happened.
+    int singular;
+
+    unsigned int *piv; // permutation indices
+    int pivsign; // either +1 or -1
+
+    // The matd_plu_t object returned "owns" the enclosed LU matrix. It
+    // is not expected that the returned object is itself useful to
+    // users: it contains the L and U information all smushed
+    // together.
+    matd_t *lu; // combined L and U matrices, permuted so they can be triangular.
+} matd_plu_t;
+
+matd_plu_t *matd_plu(const matd_t *a);
+void matd_plu_destroy(matd_plu_t *mlu);
+double matd_plu_det(const matd_plu_t *lu);
+matd_t *matd_plu_p(const matd_plu_t *lu);
+matd_t *matd_plu_l(const matd_plu_t *lu);
+matd_t *matd_plu_u(const matd_plu_t *lu);
+matd_t *matd_plu_solve(const matd_plu_t *mlu, const matd_t *b);
+
+// uses LU decomposition internally.
+matd_t *matd_solve(matd_t *A, matd_t *b);
+
+////////////////////////////////
+// Cholesky Factorization
+
+/**
+ * Creates a double matrix with the Cholesky lower triangular matrix
+ * of A. A must be symmetric, positive definite. It is the caller's
+ * responsibility to call matd_destroy() on the returned matrix.
+ */
+//matd_t *matd_cholesky(const matd_t *A);
+
+typedef struct
+{
+    int is_spd;
+    matd_t *u;
+} matd_chol_t;
+
+matd_chol_t *matd_chol(matd_t *A);
+matd_t *matd_chol_solve(const matd_chol_t *chol, const matd_t *b);
+void matd_chol_destroy(matd_chol_t *chol);
+// only sensible on PSD matrices
+matd_t *matd_chol_inverse(matd_t *a);
+
+void matd_ltransposetriangle_solve(matd_t *u, const double *b, double *x);
+void matd_ltriangle_solve(matd_t *u, const double *b, double *x);
+void matd_utriangle_solve(matd_t *u, const double *b, double *x);
+
+
+double matd_max(matd_t *m);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/math_util.h b/common/math_util.h
new file mode 100644
index 0000000..9271a01
--- /dev/null
+++ b/common/math_util.h
@@ -0,0 +1,216 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <math.h>
+#include <float.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <string.h> // memcpy
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef M_TWOPI
+# define M_TWOPI       6.2831853071795862319959  /* 2*pi */
+#endif
+
+#ifndef M_PI
+# define M_PI 3.141592653589793238462643383279502884196
+#endif
+
+#define to_radians(x) ( (x) * (M_PI / 180.0 ))
+#define to_degrees(x) ( (x) * (180.0 / M_PI ))
+
+#define max(A, B) (A < B ? B : A)
+#define min(A, B) (A < B ? A : B)
+
+  /* DEPRECATE, threshold meaningless without context.
+static inline int dequals(double a, double b)
+{
+    double thresh = 1e-9;
+    return (fabs(a-b) < thresh);
+}
+  */
+
+static inline int dequals_mag(double a, double b, double thresh)
+{
+    return (fabs(a-b) < thresh);
+}
+
+static inline int isq(int v)
+{
+    return v*v;
+}
+
+static inline float fsq(float v)
+{
+    return v*v;
+}
+
+static inline double sq(double v)
+{
+    return v*v;
+}
+
+static inline double sgn(double v)
+{
+    return (v>=0) ? 1 : -1;
+}
+
+// random number between [0, 1)
+static inline float randf()
+{
+    return (float)(rand() / (RAND_MAX + 1.0));
+}
+
+
+static inline float signed_randf()
+{
+    return randf()*2 - 1;
+}
+
+// return a random integer between [0, bound)
+static inline int irand(int bound)
+{
+    int v = (int) (randf()*bound);
+    if (v == bound)
+        return (bound-1);
+    //assert(v >= 0);
+    //assert(v < bound);
+    return v;
+}
+
+/** Map vin to [0, 2*PI) **/
+static inline double mod2pi_positive(double vin)
+{
+    return vin - M_TWOPI * floor(vin / M_TWOPI);
+}
+
+/** Map vin to [-PI, PI) **/
+static inline double mod2pi(double vin)
+{
+    return mod2pi_positive(vin + M_PI) - M_PI;
+}
+
+/** Return vin such that it is within PI degrees of ref **/
+static inline double mod2pi_ref(double ref, double vin)
+{
+    return ref + mod2pi(vin - ref);
+}
+
+/** Map vin to [0, 360) **/
+static inline double mod360_positive(double vin)
+{
+    return vin - 360 * floor(vin / 360);
+}
+
+/** Map vin to [-180, 180) **/
+static inline double mod360(double vin)
+{
+    return mod360_positive(vin + 180) - 180;
+}
+
+static inline int mod_positive(int vin, int mod) {
+    return (vin % mod + mod) % mod;
+}
+
+static inline int theta_to_int(double theta, int max)
+{
+    theta = mod2pi_ref(M_PI, theta);
+    int v = (int) (theta / M_TWOPI * max);
+
+    if (v == max)
+        v = 0;
+
+    assert (v >= 0 && v < max);
+
+    return v;
+}
+
+static inline int imin(int a, int b)
+{
+    return (a < b) ? a : b;
+}
+
+static inline int imax(int a, int b)
+{
+    return (a > b) ? a : b;
+}
+
+static inline int64_t imin64(int64_t a, int64_t b)
+{
+    return (a < b) ? a : b;
+}
+
+static inline int64_t imax64(int64_t a, int64_t b)
+{
+    return (a > b) ? a : b;
+}
+
+static inline int iclamp(int v, int minv, int maxv)
+{
+    return imax(minv, imin(v, maxv));
+}
+
+static inline double dclamp(double a, double min, double max)
+{
+    if (a < min)
+        return min;
+    if (a > max)
+        return max;
+    return a;
+}
+
+static inline int fltcmp (float f1, float f2)
+{
+    float epsilon = f1-f2;
+    if (epsilon < 0.0)
+        return -1;
+    else if (epsilon > 0.0)
+        return  1;
+    else
+        return  0;
+}
+
+static inline int dblcmp (double d1, double d2)
+{
+    double epsilon = d1-d2;
+    if (epsilon < 0.0)
+        return -1;
+    else if (epsilon > 0.0)
+        return  1;
+    else
+        return  0;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/pam.c b/common/pam.c
new file mode 100644
index 0000000..2076d39
--- /dev/null
+++ b/common/pam.c
@@ -0,0 +1,256 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "pam.h"
+
+pam_t *pam_create_from_file(const char *inpath)
+{
+    FILE *infile = fopen(inpath, "r");
+    if (infile == NULL) {
+        printf("pam.c: couldn't open input file: %s\n", inpath);
+        return NULL;
+    }
+
+    pam_t *pam = calloc(1, sizeof(pam_t));
+    pam->width = -1;
+    pam->height = -1;
+    pam->depth = -1;
+    pam->maxval = -1;
+    pam->type = -1;
+
+    int linenumber = 0;
+
+    while (1) {
+        char line[1024];
+        if (!fgets(line, sizeof(line), infile)) {
+            printf("pam.c: unexpected EOF\n");
+            goto fail;
+        }
+        linenumber++;
+
+        char *tok0 = line;
+        char *tok1 = NULL;
+
+        if (line[0] == '#') // comment
+            continue;
+
+        size_t linelen = strlen(line);
+        for (int idx = 0; idx < linelen; idx++) {
+            if (line[idx] == ' ') {
+                line[idx] = 0;
+                if (tok1) {
+                    printf("pam.c: More than two tokens, %s:%d\n", inpath, linenumber);
+                }
+
+                tok1 = &line[idx+1];
+            }
+            if (line[idx] == '\n')
+                line[idx] = 0;
+        }
+
+        if (!strcmp(tok0, "P7"))
+            continue;
+
+        if (!strcmp(tok0, "ENDHDR"))
+            break;
+
+        if (!strcmp(tok0, "WIDTH") && tok1) {
+            pam->width = atoi(tok1);
+            continue;
+        }
+
+        if (!strcmp(tok0, "HEIGHT") && tok1) {
+            pam->height = atoi(tok1);
+            continue;
+        }
+
+        if (!strcmp(tok0, "DEPTH") && tok1) {
+            pam->depth = atoi(tok1);
+            continue;
+        }
+
+        if (!strcmp(tok0, "MAXVAL") && tok1) {
+            pam->maxval = atoi(tok1);
+            continue;
+        }
+
+        if (!strcmp(tok0, "TUPLTYPE") && tok1) {
+            if (!strcmp(tok1, "GRAYSCALE_ALPHA")) {
+                pam->type = PAM_GRAYSCALE_ALPHA;
+                continue;
+            }
+
+            if (!strcmp(tok1, "RGB_ALPHA")) {
+                pam->type = PAM_RGB_ALPHA;
+                continue;
+            }
+
+            if (!strcmp(tok1, "RGB")) {
+                pam->type = PAM_RGB;
+                continue;
+            }
+
+            if (!strcmp(tok1, "GRAYSCALE")) {
+                pam->type = PAM_GRAYSCALE;
+                continue;
+            }
+
+            printf("pam.c: unrecognized tupl type %s\n", tok1);
+            continue;
+        }
+
+        printf("pam.c: unrecognized attribute %s\n", tok0);
+    }
+
+    if (pam->width < 0 || pam->height < 0 || pam->depth < 0 ||
+        pam->maxval < 0 || pam->type < 0) {
+        printf("pam.c: missing required metadata field\n");
+        goto fail;
+    }
+
+    assert(pam->maxval == 255);
+
+    pam->datalen = pam->width * pam->height * pam->depth;
+    pam->data = malloc(pam->datalen);
+    if (pam->datalen != fread(pam->data, 1, pam->datalen, infile)) {
+        printf("pam.c: couldn't read body\n");
+        goto fail;
+    }
+
+    fclose(infile);
+    return pam;
+
+  fail:
+    free(pam);
+    fclose(infile);
+    return NULL;
+}
+
+int pam_write_file(pam_t *pam, const char *outpath)
+{
+    FILE *f = fopen(outpath, "w+");
+    if (!f)
+        return -1;
+
+    const char *tupl = NULL;
+    switch (pam->type) {
+        case PAM_GRAYSCALE_ALPHA:
+            tupl = "GRAYSCALE_ALPHA";
+            break;
+        case PAM_RGB_ALPHA:
+            tupl = "RGB_ALPHA";
+            break;
+        case PAM_RGB:
+            tupl = "RGB";
+            break;
+        case PAM_GRAYSCALE:
+            tupl = "GRAYSCALE";
+            break;
+        default:
+            assert(0);
+    }
+
+    fprintf(f, "P7\nWIDTH %d\nHEIGHT %d\nDEPTH %d\nMAXVAL %d\nTUPLTYPE %s\nENDHDR\n",
+            pam->width, pam->height, pam->depth, pam->maxval, tupl);
+    int len = pam->width * pam->height * pam->depth;
+    if (len != fwrite(pam->data, 1, len, f)) {
+        fclose(f);
+        return -2;
+    }
+
+    fclose(f);
+
+    return 0;
+}
+
+void pam_destroy(pam_t *pam)
+{
+    if (!pam)
+        return;
+
+    free(pam->data);
+    free(pam);
+}
+
+pam_t *pam_copy(pam_t *pam)
+{
+    pam_t *copy = calloc(1, sizeof(pam_t));
+    copy->width = pam->width;
+    copy->height = pam->height;
+    copy->depth = pam->depth;
+    copy->maxval = pam->maxval;
+    copy->type = pam->type;
+
+    copy->datalen = pam->datalen;
+    copy->data = malloc(pam->datalen);
+    memcpy(copy->data, pam->data, pam->datalen);
+
+    return copy;
+}
+
+pam_t *pam_convert(pam_t *in, int type)
+{
+    if (type == in->type)
+        return pam_copy(in);
+
+    assert(type == PAM_RGB_ALPHA); // we don't support a lot yet
+    assert(in->maxval == 255);
+
+    int w = in->width;
+    int h = in->height;
+
+    pam_t *out = calloc(1, sizeof(pam_t));
+    out->type = type;
+    out->width = w;
+    out->height = h;
+    out->maxval = in->maxval;
+    out->depth = 4;
+    out->datalen = 4 * w * h;
+    out->data = malloc(out->datalen);
+
+    if (in->type == PAM_RGB) {
+        assert(in->depth == 3);
+        for (int y = 0; y < h; y++) {
+            for (int x = 0; x < w; x++) {
+                out->data[y*4*w + 4*x + 0] = in->data[y*3*w + 3*x + 0];
+                out->data[y*4*w + 4*x + 1] = in->data[y*3*w + 3*x + 1];
+                out->data[y*4*w + 4*x + 2] = in->data[y*3*w + 3*x + 2];
+                out->data[y*4*w + 4*x + 3] = 255;
+            }
+        }
+    } else {
+        printf("pam.c unsupported type %d\n", in->type);
+        assert(0);
+    }
+
+    return out;
+}
diff --git a/common/pam.h b/common/pam.h
new file mode 100644
index 0000000..67fff67
--- /dev/null
+++ b/common/pam.h
@@ -0,0 +1,54 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+
+enum { PAM_GRAYSCALE_ALPHA = 5000, PAM_RGB_ALPHA, PAM_RGB, PAM_GRAYSCALE };
+
+typedef struct pam pam_t;
+struct pam
+{
+    int type; // one of PAM_*
+
+    int width, height; // note, stride always width.
+    int depth; // bytes per pixel
+    int maxval; // maximum value per channel, e.g. 255 for 8bpp
+
+    int datalen; // in bytes
+    uint8_t *data;
+};
+
+pam_t *pam_create_from_file(const char *inpath);
+int pam_write_file(pam_t *pam, const char *outpath);
+void pam_destroy(pam_t *pam);
+
+pam_t *pam_copy(pam_t *pam);
+
+// NB doesn't handle many conversions yet.
+pam_t *pam_convert(pam_t *in, int type);
diff --git a/common/pjpeg-idct.c b/common/pjpeg-idct.c
new file mode 100644
index 0000000..ddbdde5
--- /dev/null
+++ b/common/pjpeg-idct.c
@@ -0,0 +1,388 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <math.h>
+#include <stdint.h>
+
+#ifndef M_PI
+# define M_PI 3.141592653589793238462643383279502884196
+#endif
+
+// 8 bits of fixed-point output
+//
+// This implementation has a worst-case complexity of 22 multiplies
+// and 64 adds. This makes it significantly worse (about 2x) than the
+// best-known fast inverse cosine transform methods. HOWEVER, zero
+// coefficients can be skipped over, and since that's common (often
+// more than half the coefficients are zero).
+//
+// The output is scaled by a factor of 256 (due to our fixed-point
+// integer arithmetic)..
+static inline void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
+{
+    for (int x = 0; x < 8; x++)
+        out[x*outstride] = 0;
+
+    int32_t c;
+
+    c = in[0*instride];
+    if (c) {
+        // 181  181  181  181  181  181  181  181
+        int32_t c181 = c * 181;
+        out[0*outstride] += c181;
+        out[1*outstride] += c181;
+        out[2*outstride] += c181;
+        out[3*outstride] += c181;
+        out[4*outstride] += c181;
+        out[5*outstride] += c181;
+        out[6*outstride] += c181;
+        out[7*outstride] += c181;
+    }
+
+    c = in[1*instride];
+    if (c) {
+        // 251  212  142   49  -49 -142 -212 -251
+        int32_t c251 = c * 251;
+        int32_t c212 = c * 212;
+        int32_t c142 = c * 142;
+        int32_t c49 = c * 49;
+        out[0*outstride] += c251;
+        out[1*outstride] += c212;
+        out[2*outstride] += c142;
+        out[3*outstride] += c49;
+        out[4*outstride] -= c49;
+        out[5*outstride] -= c142;
+        out[6*outstride] -= c212;
+        out[7*outstride] -= c251;
+    }
+
+    c = in[2*instride];
+    if (c) {
+        // 236   97  -97 -236 -236  -97   97  236
+        int32_t c236 = c*236;
+        int32_t c97 = c*97;
+        out[0*outstride] += c236;
+        out[1*outstride] += c97;
+        out[2*outstride] -= c97;
+        out[3*outstride] -= c236;
+        out[4*outstride] -= c236;
+        out[5*outstride] -= c97;
+        out[6*outstride] += c97;
+        out[7*outstride] += c236;
+    }
+
+    c = in[3*instride];
+    if (c) {
+        // 212  -49 -251 -142  142  251   49 -212
+        int32_t c212 = c*212;
+        int32_t c49 = c*49;
+        int32_t c251 = c*251;
+        int32_t c142 = c*142;
+        out[0*outstride] += c212;
+        out[1*outstride] -= c49;
+        out[2*outstride] -= c251;
+        out[3*outstride] -= c142;
+        out[4*outstride] += c142;
+        out[5*outstride] += c251;
+        out[6*outstride] += c49;
+        out[7*outstride] -= c212;
+    }
+
+    c = in[4*instride];
+    if (c) {
+        // 181 -181 -181  181  181 -181 -181  181
+        int32_t c181 = c*181;
+        out[0*outstride] += c181;
+        out[1*outstride] -= c181;
+        out[2*outstride] -= c181;
+        out[3*outstride] += c181;
+        out[4*outstride] += c181;
+        out[5*outstride] -= c181;
+        out[6*outstride] -= c181;
+        out[7*outstride] += c181;
+    }
+
+    c = in[5*instride];
+    if (c) {
+        // 142 -251   49  212 -212  -49  251 -142
+        int32_t c142 = c*142;
+        int32_t c251 = c*251;
+        int32_t c49 = c*49;
+        int32_t c212 = c*212;
+        out[0*outstride] += c142;
+        out[1*outstride] -= c251;
+        out[2*outstride] += c49;
+        out[3*outstride] += c212;
+        out[4*outstride] -= c212;
+        out[5*outstride] -= c49;
+        out[6*outstride] += c251;
+        out[7*outstride] -= c142;
+    }
+
+    c = in[6*instride];
+    if (c) {
+        //  97 -236  236  -97  -97  236 -236   97
+        int32_t c97 = c*97;
+        int32_t c236 = c*236;
+        out[0*outstride] += c97;
+        out[1*outstride] -= c236;
+        out[2*outstride] += c236;
+        out[3*outstride] -= c97;
+        out[4*outstride] -= c97;
+        out[5*outstride] += c236;
+        out[6*outstride] -= c236;
+        out[7*outstride] += c97;
+    }
+
+    c = in[7*instride];
+    if (c) {
+        //  49 -142  212 -251  251 -212  142  -49
+        int32_t c49 = c*49;
+        int32_t c142 = c*142;
+        int32_t c212 = c*212;
+        int32_t c251 = c*251;
+        out[0*outstride] += c49;
+        out[1*outstride] -= c142;
+        out[2*outstride] += c212;
+        out[3*outstride] -= c251;
+        out[4*outstride] += c251;
+        out[5*outstride] -= c212;
+        out[6*outstride] += c142;
+        out[7*outstride] -= c49;
+    }
+}
+
+void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
+{
+    int32_t tmp[64];
+
+    // idct on rows
+    for (int y = 0; y < 8; y++)
+        idct_1D_u32(&in[8*y], 1, &tmp[8*y], 1);
+
+    int32_t tmp2[64];
+
+    // idct on columns
+    for (int x = 0; x < 8; x++)
+        idct_1D_u32(&tmp[x], 8, &tmp2[x], 8);
+
+    // scale, adjust bias, and clamp
+    for (int y = 0; y < 8; y++) {
+        for (int x = 0; x < 8; x++) {
+            int i = 8*y + x;
+
+            // Shift of 18: the divide by 4 as part of the idct, and a shift by 16
+            // to undo the fixed-point arithmetic. (We accumulated 8 bits of
+            // fractional precision during each of the row and column IDCTs)
+            //
+            // Originally:
+            //            int32_t v = (tmp2[i] >> 18) + 128;
+            //
+            // Move the add before the shift and we can do rounding at
+            // the same time.
+            const int32_t offset = (128 << 18) + (1 << 17);
+            int32_t v = (tmp2[i] + offset) >> 18;
+
+            if (v < 0)
+                v = 0;
+            if (v > 255)
+                v = 255;
+
+            out[y*outstride + x] = v;
+        }
+    }
+}
+
+///////////////////////////////////////////////////////
+// Below: a "as straight-forward as I can make" implementation.
+static inline void idct_1D_double(double *in, int instride, double *out, int outstride)
+{
+    for (int x = 0; x < 8; x++)
+        out[x*outstride] = 0;
+
+    // iterate over IDCT coefficients
+    double Cu = 1/sqrt(2);
+
+    for (int u = 0; u < 8; u++, Cu = 1) {
+
+        double coeff = in[u*instride];
+        if (coeff == 0)
+            continue;
+
+        for (int x = 0; x < 8; x++)
+            out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff;
+    }
+}
+
+void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
+{
+    double din[64], dout[64];
+    for (int i = 0; i < 64; i++)
+        din[i] = in[i];
+
+    double tmp[64];
+
+    // idct on rows
+    for (int y = 0; y < 8; y++)
+        idct_1D_double(&din[8*y], 1, &tmp[8*y], 1);
+
+    // idct on columns
+    for (int x = 0; x < 8; x++)
+        idct_1D_double(&tmp[x], 8, &dout[x], 8);
+
+    // scale, adjust bias, and clamp
+    for (int y = 0; y < 8; y++) {
+        for (int x = 0; x < 8; x++) {
+            int i = 8*y + x;
+
+            dout[i] = (dout[i] / 4) + 128;
+            if (dout[i] < 0)
+                dout[i] = 0;
+            if (dout[i] > 255)
+                dout[i] = 255;
+
+            // XXX round by adding +.5?
+            out[y*outstride + x] = dout[i];
+        }
+    }
+}
+
+//////////////////////////////////////////////
+static inline unsigned char njClip(const int x) {
+    return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x);
+}
+
+#define W1 2841
+#define W2 2676
+#define W3 2408
+#define W5 1609
+#define W6 1108
+#define W7 565
+
+static inline void njRowIDCT(int* blk) {
+    int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+    if (!((x1 = blk[4] << 11)
+        | (x2 = blk[6])
+        | (x3 = blk[2])
+        | (x4 = blk[1])
+        | (x5 = blk[7])
+        | (x6 = blk[5])
+        | (x7 = blk[3])))
+    {
+        blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
+        return;
+    }
+    x0 = (blk[0] << 11) + 128;
+    x8 = W7 * (x4 + x5);
+    x4 = x8 + (W1 - W7) * x4;
+    x5 = x8 - (W1 + W7) * x5;
+    x8 = W3 * (x6 + x7);
+    x6 = x8 - (W3 - W5) * x6;
+    x7 = x8 - (W3 + W5) * x7;
+    x8 = x0 + x1;
+    x0 -= x1;
+    x1 = W6 * (x3 + x2);
+    x2 = x1 - (W2 + W6) * x2;
+    x3 = x1 + (W2 - W6) * x3;
+    x1 = x4 + x6;
+    x4 -= x6;
+    x6 = x5 + x7;
+    x5 -= x7;
+    x7 = x8 + x3;
+    x8 -= x3;
+    x3 = x0 + x2;
+    x0 -= x2;
+    x2 = (181 * (x4 + x5) + 128) >> 8;
+    x4 = (181 * (x4 - x5) + 128) >> 8;
+    blk[0] = (x7 + x1) >> 8;
+    blk[1] = (x3 + x2) >> 8;
+    blk[2] = (x0 + x4) >> 8;
+    blk[3] = (x8 + x6) >> 8;
+    blk[4] = (x8 - x6) >> 8;
+    blk[5] = (x0 - x4) >> 8;
+    blk[6] = (x3 - x2) >> 8;
+    blk[7] = (x7 - x1) >> 8;
+}
+
+static inline void njColIDCT(const int* blk, unsigned char *out, int stride) {
+    int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+    if (!((x1 = blk[8*4] << 8)
+        | (x2 = blk[8*6])
+        | (x3 = blk[8*2])
+        | (x4 = blk[8*1])
+        | (x5 = blk[8*7])
+        | (x6 = blk[8*5])
+        | (x7 = blk[8*3])))
+    {
+        x1 = njClip(((blk[0] + 32) >> 6) + 128);
+        for (x0 = 8;  x0;  --x0) {
+            *out = (unsigned char) x1;
+            out += stride;
+        }
+        return;
+    }
+    x0 = (blk[0] << 8) + 8192;
+    x8 = W7 * (x4 + x5) + 4;
+    x4 = (x8 + (W1 - W7) * x4) >> 3;
+    x5 = (x8 - (W1 + W7) * x5) >> 3;
+    x8 = W3 * (x6 + x7) + 4;
+    x6 = (x8 - (W3 - W5) * x6) >> 3;
+    x7 = (x8 - (W3 + W5) * x7) >> 3;
+    x8 = x0 + x1;
+    x0 -= x1;
+    x1 = W6 * (x3 + x2) + 4;
+    x2 = (x1 - (W2 + W6) * x2) >> 3;
+    x3 = (x1 + (W2 - W6) * x3) >> 3;
+    x1 = x4 + x6;
+    x4 -= x6;
+    x6 = x5 + x7;
+    x5 -= x7;
+    x7 = x8 + x3;
+    x8 -= x3;
+    x3 = x0 + x2;
+    x0 -= x2;
+    x2 = (181 * (x4 + x5) + 128) >> 8;
+    x4 = (181 * (x4 - x5) + 128) >> 8;
+    *out = njClip(((x7 + x1) >> 14) + 128);  out += stride;
+    *out = njClip(((x3 + x2) >> 14) + 128);  out += stride;
+    *out = njClip(((x0 + x4) >> 14) + 128);  out += stride;
+    *out = njClip(((x8 + x6) >> 14) + 128);  out += stride;
+    *out = njClip(((x8 - x6) >> 14) + 128);  out += stride;
+    *out = njClip(((x0 - x4) >> 14) + 128);  out += stride;
+    *out = njClip(((x3 - x2) >> 14) + 128);  out += stride;
+    *out = njClip(((x7 - x1) >> 14) + 128);
+}
+
+void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
+{
+    int coef;
+
+    for (coef = 0;  coef < 64;  coef += 8)
+        njRowIDCT(&in[coef]);
+    for (coef = 0;  coef < 8;  ++coef)
+        njColIDCT(&in[coef], &out[coef], outstride);
+}
diff --git a/common/pjpeg.c b/common/pjpeg.c
new file mode 100644
index 0000000..acc61f0
--- /dev/null
+++ b/common/pjpeg.c
@@ -0,0 +1,893 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "pjpeg.h"
+
+#include "image_u8.h"
+#include "image_u8x3.h"
+#include "debug_print.h"
+
+// https://www.w3.org/Graphics/JPEG/itu-t81.pdf
+
+void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride);
+void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride);
+void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride);
+
+struct pjpeg_huffman_code
+{
+    uint8_t nbits;  // how many bits should we actually consume?
+    uint8_t code;   // what is the symbol that was encoded? (not actually a DCT coefficient; see encoding)
+};
+
+struct pjpeg_decode_state
+{
+    int error;
+
+    uint32_t width, height;
+    uint8_t *in;
+    uint32_t inlen;
+
+    uint32_t flags;
+
+    // to decode, we load the next 16 bits of input (generally more
+    // than we need). We then look up in our code book how many bits
+    // we have actually consumed. For example, if there was a code
+    // whose bit sequence was "0", the first 32768 entries would all
+    // be copies of {.bits=1, .value=XX}; no matter what the following
+    // 15 bits are, we would get the correct decode.
+    //
+    // Can be up to 8 tables; computed as (ACDC * 2 + htidx)
+    struct pjpeg_huffman_code huff_codes[4][65536];
+    int huff_codes_present[4];
+
+    uint8_t  qtab[4][64];
+
+    int ncomponents;
+    pjpeg_component_t *components;
+
+    int reset_interval;
+    int reset_count;
+    int reset_next; // What reset marker do we expect next? (add 0xd0)
+
+    int debug;
+};
+
+// from K.3.3.1 (page 158)
+static uint8_t mjpeg_dht[] = { // header
+    0xFF,0xC4,0x01,0xA2,
+
+    /////////////////////////////////////////////////////////////
+    // luminance dc coefficients.
+    // DC table 0
+    0x00,
+    // code lengths
+    0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    // values
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,
+
+    /////////////////////////////////////////////////////////////
+    // chrominance DC coefficents
+    // DC table 1
+    0x01,
+    // code lengths
+    0x00,0x03,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,
+    // values
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,
+
+    /////////////////////////////////////////////////////////////
+    // luminance AC coefficients
+    // AC table 0
+    0x10,
+    // code lengths
+    0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,0x01,0x7D,
+    // codes
+    0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,
+    0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08,0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,
+    0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28,0x29,0x2A,0x34,
+    0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,
+    0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,
+    0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,
+    0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,
+    0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,
+    0xDA,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
+    0xF8,0xF9,0xFA,
+
+    /////////////////////////////////////////////////////////////
+    // chrominance DC coefficients
+    // DC table 1
+    0x11,
+    // code lengths
+    0x00,0x02,0x01,0x02,0x04,0x04,0x03,0x04,0x07,0x05,0x04,0x04,0x00,0x01,0x02,0x77,
+    // values
+    0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,
+    0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,
+    0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26,0x27,0x28,0x29,0x2A,
+    0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,
+    0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,
+    0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,
+    0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,
+    0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,
+    0xD9,0xDA,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,
+    0xF9,0xFA
+};
+
+static inline uint8_t max_u8(uint8_t a, uint8_t b)
+{
+    return a > b ? a : b;
+}
+
+// order of coefficients in each DC block
+static const char ZZ[64] = { 0,   1,  8, 16,  9,  2,  3, 10,
+                             17, 24, 32, 25, 18, 11,  4,  5,
+                             12, 19, 26, 33, 40, 48, 41, 34,
+                             27, 20, 13,  6,  7, 14, 21, 28,
+                             35, 42, 49, 56, 57, 50, 43, 36,
+                             29, 22, 15, 23, 30, 37, 44, 51,
+                             58, 59, 52, 45, 38, 31, 39, 46,
+                             53, 60, 61, 54, 47, 55, 62, 63 };
+
+
+
+struct bit_decoder
+{
+    uint8_t *in;
+    uint32_t inpos;
+    uint32_t inlen;
+
+    uint32_t bits; // the low order bits contain the next nbits_avail bits.
+
+    int nbits_avail; // how many bits in 'bits' (left aligned) are valid?
+
+    int error;
+};
+
+// ensure that at least 'nbits' of data is available in the bit decoder.
+static inline void bd_ensure(struct bit_decoder *bd, int nbits)
+{
+    while (bd->nbits_avail < nbits) {
+
+        if (bd->inpos >= bd->inlen) {
+            printf("hallucinating 1s!\n");
+            // we hit end of stream hallucinate an infinite stream of 1s
+            bd->bits = (bd->bits << 8) | 0xff;
+            bd->nbits_avail += 8;
+            continue;
+        }
+
+        uint8_t nextbyte = bd->in[bd->inpos];
+        bd->inpos++;
+
+        if (nextbyte == 0xff && bd->inpos < bd->inlen && bd->in[bd->inpos] == 0x00) {
+            // a stuffed byte
+            nextbyte = 0xff;
+            bd->inpos++;
+        }
+
+        // it's an ordinary byte
+        bd->bits = (bd->bits << 8) | nextbyte;
+        bd->nbits_avail += 8;
+    }
+}
+
+static inline uint32_t bd_peek_bits(struct bit_decoder *bd, int nbits)
+{
+    bd_ensure(bd, nbits);
+
+    return (bd->bits >> (bd->nbits_avail - nbits)) & ((1 << nbits) - 1);
+}
+
+static inline uint32_t bd_consume_bits(struct bit_decoder *bd, int nbits)
+{
+    assert(nbits < 32);
+
+    bd_ensure(bd, nbits);
+
+    uint32_t v = (bd->bits >> (bd->nbits_avail - nbits)) & ((1 << nbits) - 1);
+
+    bd->nbits_avail -= nbits;
+
+    return v;
+}
+
+// discard without regard for byte stuffing!
+static inline void bd_discard_bytes(struct bit_decoder *bd, int nbytes)
+{
+    assert(bd->nbits_avail == 0);
+    bd->inpos += nbytes;
+}
+
+static inline int bd_has_more(struct bit_decoder *bd)
+{
+    return bd->nbits_avail > 0 || bd->inpos < bd->inlen;
+}
+
+// throw away up to 7 bits of data so that the next data returned
+// began on a byte boundary.
+static inline void bd_discard_to_byte_boundary(struct bit_decoder *bd)
+{
+    bd->nbits_avail -= (bd->nbits_avail & 7);
+}
+
+static inline uint32_t bd_get_offset(struct bit_decoder *bd)
+{
+    return bd->inpos - bd->nbits_avail / 8;
+}
+
+static int pjpeg_decode_buffer(struct pjpeg_decode_state *pjd)
+{
+    // XXX TODO Include sanity check that this is actually a JPG
+
+    struct bit_decoder bd;
+    memset(&bd, 0, sizeof(struct bit_decoder));
+    bd.in = pjd->in;
+    bd.inpos = 0;
+    bd.inlen = pjd->inlen;
+
+    int marker_sync_skipped = 0;
+    int marker_sync_skipped_from_offset = 0;
+
+    while (bd_has_more(&bd)) {
+
+        uint32_t marker_offset = bd_get_offset(&bd);
+
+        // Look for the 0xff that signifies the beginning of a marker
+        bd_discard_to_byte_boundary(&bd);
+
+        while (bd_consume_bits(&bd, 8) != 0xff) {
+            if (marker_sync_skipped == 0)
+                marker_sync_skipped_from_offset = marker_offset;
+            marker_sync_skipped++;
+            continue;
+        }
+
+        if (marker_sync_skipped) {
+            printf("%08x: skipped %04x bytes\n", marker_sync_skipped_from_offset, marker_sync_skipped);
+            marker_sync_skipped = 0;
+        }
+
+        uint8_t marker = bd_consume_bits(&bd, 8);
+
+//        printf("marker %08x : %02x\n", marker_offset, marker);
+
+        switch (marker) {
+
+            case 0xd8:  // start of image. Great, continue.
+                continue;
+
+                // below are the markers that A) we don't care about
+                // that B) encode length as two bytes.
+                //
+                // Note: Other unknown fields should not be added since
+                // we should be able to skip over them by looking for
+                // the next marker byte.
+            case 0xe0: // JFIF header.
+            case 0xe1: // EXIF header (Yuck: Payload may contain 0xff 0xff!)
+            case 0xe2: // ICC Profile. (Yuck: payload may contain 0xff 0xff!)
+            case 0xe6: // some other common header
+            case 0xfe: // Comment
+            {
+                uint16_t length = bd_consume_bits(&bd, 16);
+                bd_discard_bytes(&bd, length - 2);
+                continue;
+            }
+
+            case 0xdb: { // DQT Define Quantization Table
+                uint16_t length = bd_consume_bits(&bd, 16);
+
+                if (((length-2) % 65) != 0)
+                    return PJPEG_ERR_DQT;
+
+                // can contain multiple DQTs
+                for (int offset = 0; offset < length - 2; offset += 65) {
+
+                    // pq: quant table element precision. 0=8bit, 1=16bit.
+                    // tq: quant table destination id.
+                    uint8_t  pqtq = bd_consume_bits(&bd, 8);
+
+                    if ((pqtq & 0xf0) != 0 || (pqtq & 0x0f) >= 4)
+                        return PJPEG_ERR_DQT;
+
+                    uint8_t id = pqtq & 3;
+
+                    for (int i = 0; i < 64; i++)
+                        pjd->qtab[id][i] = bd_consume_bits(&bd, 8);
+                }
+
+                break;
+            }
+
+            case 0xc0: { // SOF, non-differential, huffman, baseline
+                uint16_t length = bd_consume_bits(&bd, 16);
+                (void) length;
+
+                uint8_t p = bd_consume_bits(&bd, 8); // precision
+                if (p != 8)
+                    return PJPEG_ERR_SOF;
+
+                pjd->height = bd_consume_bits(&bd, 16);
+                pjd->width = bd_consume_bits(&bd, 16);
+
+//                printf("%d x %d\n", pjd->height, pjd->width);
+
+                int nf = bd_consume_bits(&bd, 8); // # image components
+
+                if (nf < 1 || nf > 3)
+                    return PJPEG_ERR_SOF;
+
+                pjd->ncomponents = nf;
+                pjd->components = calloc(nf, sizeof(struct pjpeg_component));
+
+                for (int i = 0; i < nf; i++) {
+                    // comp. identifier
+                    pjd->components[i].id = bd_consume_bits(&bd, 8);
+
+                    // horiz/vert sampling
+                    pjd->components[i].hv = bd_consume_bits(&bd, 8);
+                    pjd->components[i].scaley = pjd->components[i].hv & 0x0f;
+                    pjd->components[i].scalex = pjd->components[i].hv >> 4;
+
+                    // which quant table?
+                    pjd->components[i].tq = bd_consume_bits(&bd, 8);
+                }
+                break;
+            }
+
+            case 0xc1: // SOF, non-differential, huffman,    extended DCT
+            case 0xc2: // SOF, non-differential, huffman,    progressive DCT
+            case 0xc3: // SOF, non-differential, huffman,    lossless
+            case 0xc5: // SOF, differential,     huffman,    baseline DCT
+            case 0xc6: // SOF, differential,     huffman,    progressive
+            case 0xc7: // SOF, differential,     huffman,    lossless
+            case 0xc8: // reserved
+            case 0xc9: // SOF, non-differential, arithmetic, extended
+            case 0xca: // SOF, non-differential, arithmetic, progressive
+            case 0xcb: // SOF, non-differential, arithmetic, lossless
+            case 0xcd: // SOF, differential,     arithmetic, sequential
+            case 0xce: // SOF, differential,     arithmetic, progressive
+            case 0xcf: // SOF, differential,     arithmetic, lossless
+            {
+                printf("pjepg.c: unsupported JPEG type %02x\n", marker);
+                return PJEPG_ERR_UNSUPPORTED;
+            }
+
+            case 0xc4: { // DHT Define Huffman Tables
+                // [ED: the encoding of these tables is really quite
+                // clever!]
+                uint16_t length = bd_consume_bits(&bd, 16);
+                length = length - 2;
+
+                while (length > 0) {
+                    uint8_t TcTh = bd_consume_bits(&bd, 8);
+                    length--;
+                    uint8_t Tc = (TcTh >> 4);
+                    int Th = TcTh & 0x0f; // which index are we using?
+
+                    if (Tc >= 2 || Th >= 2)
+                        // Tc must be either AC=1 or DC=0.
+                        // Th must be less than 2
+                        return PJPEG_ERR_DHT;
+
+                    int htidx = Tc*2 + Th;
+
+                    uint8_t L[17]; // how many symbols of each bit length?
+                    L[0] = 0;      // no 0 bit codes :)
+                    for (int nbits = 1; nbits <= 16; nbits++) {
+                        L[nbits] = bd_consume_bits(&bd, 8);
+                        length -= L[nbits];
+                    }
+                    length -= 16;
+
+                    uint32_t code_pos = 0;
+
+                    for (int nbits = 1; nbits <= 16; nbits++) {
+                        int nvalues = L[nbits];
+
+                        // how many entries will we fill?
+                        // (a 1 bit code will fill 32768, a 2 bit code 16384, ...)
+                        uint32_t ncodes = (1 << (16 - nbits));
+
+                        // consume the values...
+                        for (int vi = 0; vi < nvalues; vi++) {
+                            uint8_t code = bd_consume_bits(&bd, 8);
+
+                            if (code_pos + ncodes > 0xffff)
+                                return PJPEG_ERR_DHT;
+
+                            for (int ci = 0; ci < ncodes; ci++) {
+                                pjd->huff_codes[htidx][code_pos].nbits = nbits;
+                                pjd->huff_codes[htidx][code_pos].code = code;
+                                code_pos++;
+                            }
+                        }
+                    }
+                    pjd->huff_codes_present[htidx] = 1;
+                }
+                break;
+            }
+
+                // a sequentially-encoded JPG has one SOS segment. A
+                // progressive JPG will have multiple SOS segments.
+            case 0xda: { // Start Of Scan (SOS)
+
+                // Note that this marker frame (and its encoded
+                // length) does NOT include the bitstream that
+                // follows.
+
+                uint16_t length = bd_consume_bits(&bd, 16);
+                (void) length;
+
+                // number of components in this scan
+                uint8_t ns = bd_consume_bits(&bd, 8);
+
+                // for each component, what is the index into our pjd->components[] array?
+                uint8_t *comp_idx = calloc(ns, sizeof(uint8_t));
+
+                for (int i = 0; i < ns; i++) {
+                    // component name
+                    uint8_t cs = bd_consume_bits(&bd, 8);
+
+                    int found = 0;
+                    for (int j = 0; j < pjd->ncomponents; j++) {
+
+                        if (cs == pjd->components[j].id) {
+                            // which huff tables will we use for
+                            // DC (high 4 bits) and AC (low 4 bits)
+                            pjd->components[j].tda = bd_consume_bits(&bd, 8);
+                            comp_idx[i] = j;
+                            found = 1;
+                            break;
+                        }
+                    }
+
+                    if (!found)
+                        return PJPEG_ERR_SOS;
+                }
+
+                // start of spectral selection. baseline == 0
+                uint8_t ss = bd_consume_bits(&bd, 8);
+
+                // end of spectral selection. baseline == 0x3f
+                uint8_t se = bd_consume_bits(&bd, 8);
+
+                // successive approximation bits. baseline == 0
+                uint8_t Ahl = bd_consume_bits(&bd, 8);
+
+                if (ss != 0 || se != 0x3f || Ahl != 0x00)
+                    return PJPEG_ERR_SOS;
+
+                // compute the dimensions of each MCU in pixels
+                int maxmcux = 0, maxmcuy = 0;
+                for (int i = 0; i < ns; i++) {
+                    struct pjpeg_component *comp = &pjd->components[comp_idx[i]];
+
+                    maxmcux = max_u8(maxmcux, comp->scalex * 8);
+                    maxmcuy = max_u8(maxmcuy, comp->scaley * 8);
+                }
+
+                // how many MCU blocks are required to encode the whole image?
+                int mcus_x = (pjd->width + maxmcux - 1) / maxmcux;
+                int mcus_y = (pjd->height + maxmcuy - 1) / maxmcuy;
+
+                if (0)
+                    printf("Image has %d x %d MCU blocks, each %d x %d pixels\n",
+                           mcus_x, mcus_y, maxmcux, maxmcuy);
+
+                // allocate output storage
+                for (int i = 0; i < ns; i++) {
+                    struct pjpeg_component *comp = &pjd->components[comp_idx[i]];
+                    comp->width = mcus_x * comp->scalex * 8;
+                    comp->height = mcus_y * comp->scaley * 8;
+                    comp->stride = comp->width;
+
+                    int alignment = 32;
+                    if ((comp->stride % alignment) != 0)
+                        comp->stride += alignment - (comp->stride % alignment);
+
+                    comp->data = calloc(comp->height * comp->stride, 1);
+                }
+
+
+                // each component has its own DC prediction
+                int32_t *dcpred = calloc(ns, sizeof(int32_t));
+
+                pjd->reset_count = 0;
+
+                for (int mcu_y = 0; mcu_y < mcus_y; mcu_y++) {
+                    for (int mcu_x = 0; mcu_x < mcus_x; mcu_x++) {
+
+                        // the next two bytes in the input stream
+                        // should be 0xff 0xdN, where N is the next
+                        // reset counter.
+                        //
+                        // Our bit decoder may have already shifted
+                        // these into the buffer.  Consequently, we
+                        // want to use our bit decoding functions to
+                        // check for the marker. But we must first
+                        // discard any fractional bits left.
+                        if (pjd->reset_interval > 0 && pjd->reset_count == pjd->reset_interval) {
+
+                            // RST markers are byte-aligned, so force
+                            // the bit-decoder to the next byte
+                            // boundary.
+                            bd_discard_to_byte_boundary(&bd);
+
+                            while (1) {
+                                int32_t value = bd_consume_bits(&bd, 8);
+                                if (bd.inpos > bd.inlen)
+                                    return PJPEG_ERR_EOF;
+                                if (value == 0xff)
+                                    break;
+                                printf("RST SYNC\n");
+                            }
+
+                            int32_t marker_32 = bd_consume_bits(&bd, 8);
+
+//                            printf("%04x: RESET? %02x\n", *bd.inpos,  marker_32);
+                            if (marker_32 != (0xd0 + pjd->reset_next))
+                                return PJPEG_ERR_RESET;
+
+                            pjd->reset_count = 0;
+                            pjd->reset_next = (pjd->reset_next + 1) & 0x7;
+
+                            memset(dcpred, 0, sizeof(*dcpred));
+                        }
+
+                        for (int nsidx = 0; nsidx < ns; nsidx++) {
+
+                            struct pjpeg_component *comp = &pjd->components[comp_idx[nsidx]];
+
+                            int32_t block[64];
+
+                            int qtabidx = comp->tq; // which quant table?
+
+                            for (int sby = 0; sby < comp->scaley; sby++) {
+                                for (int sbx = 0; sbx < comp->scalex; sbx++) {
+                                    // decode block for component nsidx
+                                    memset(block, 0, sizeof(block));
+
+                                    int dc_huff_table_idx = comp->tda >> 4;
+                                    int ac_huff_table_idx = 2 + (comp->tda & 0x0f);
+
+                                    if (!pjd->huff_codes_present[dc_huff_table_idx] ||
+                                        !pjd->huff_codes_present[ac_huff_table_idx])
+                                        return PJPEG_ERR_MISSING_DHT; // probably an MJPEG.
+
+
+                                    if (1) {
+                                        // do DC coefficient
+                                        uint32_t next16 = bd_peek_bits(&bd, 16);
+                                        struct pjpeg_huffman_code *huff_code = &pjd->huff_codes[dc_huff_table_idx][next16];
+                                        bd_consume_bits(&bd, huff_code->nbits);
+
+                                        int ssss = huff_code->code & 0x0f; // ssss == number of additional bits to read
+                                        int32_t value = bd_consume_bits(&bd, ssss);
+
+                                        // if high bit is clear, it's negative
+                                        if ((value & (1 << (ssss-1))) == 0)
+                                            value += ((-1) << ssss) + 1;
+
+                                        dcpred[nsidx] += value;
+                                        block[0] = dcpred[nsidx] * pjd->qtab[qtabidx][0];
+                                    }
+
+                                    if (1) {
+                                        // do AC coefficients
+                                        for (int coeff = 1; coeff < 64; coeff++) {
+
+                                            uint32_t next16 = bd_peek_bits(&bd, 16);
+
+                                            struct pjpeg_huffman_code *huff_code = &pjd->huff_codes[ac_huff_table_idx][next16];
+                                            bd_consume_bits(&bd, huff_code->nbits);
+
+                                            if (huff_code->code == 0) {
+                                                break; // EOB
+                                            }
+
+                                            int rrrr = huff_code->code >> 4; // run length of zeros
+                                            int ssss = huff_code->code & 0x0f;
+
+                                            int32_t value = bd_consume_bits(&bd, ssss);
+
+                                            // if high bit is clear, it's negative
+                                            if ((value & (1 << (ssss-1))) == 0)
+                                                value += ((-1) << ssss) + 1;
+
+                                            coeff += rrrr;
+
+                                            block[(int) ZZ[coeff]] = value * pjd->qtab[qtabidx][coeff];
+                                        }
+                                    }
+
+                                    // do IDCT
+
+                                    // output block's upper-left
+                                    // coordinate (in pixels) is
+                                    // (comp_x, comp_y).
+                                    uint32_t comp_x = (mcu_x * comp->scalex + sbx) * 8;
+                                    uint32_t comp_y = (mcu_y * comp->scaley + sby) * 8;
+                                    uint32_t dataidx = comp_y * comp->stride + comp_x;
+
+//                                    pjpeg_idct_2D_u32(block, &comp->data[dataidx], comp->stride);
+                                    pjpeg_idct_2D_nanojpeg(block, &comp->data[dataidx], comp->stride);
+                                }
+                            }
+                        }
+
+                        pjd->reset_count++;
+//                        printf("%04x: reset count %d / %d\n", pjd->inpos, pjd->reset_count, pjd->reset_interval);
+
+                    }
+                }
+
+                free(dcpred);
+                free(comp_idx);
+
+                break;
+            }
+
+            case 0xd9: { // EOI End of Image
+                goto got_end_of_image;
+            }
+
+            case 0xdd: { // Define Restart Interval
+                uint16_t length = bd_consume_bits(&bd, 16);
+                if (length != 4)
+                    return PJPEG_ERR_DRI;
+
+                // reset interval measured in the number of MCUs
+                pjd->reset_interval = bd_consume_bits(&bd, 16);
+
+                break;
+            }
+
+            default: {
+                printf("pjepg: Unknown marker %02x at offset %04x\n", marker, marker_offset);
+
+                // try to skip it.
+                uint16_t length = bd_consume_bits(&bd, 16);
+                bd_discard_bytes(&bd, length - 2);
+                continue;
+            }
+        } // switch (marker)
+    } // while inpos < inlen
+
+  got_end_of_image:
+
+    return PJPEG_OKAY;
+}
+
+void pjpeg_destroy(pjpeg_t *pj)
+{
+    if (!pj)
+        return;
+
+    for (int i = 0; i < pj->ncomponents; i++)
+        free(pj->components[i].data);
+    free(pj->components);
+
+    free(pj);
+}
+
+
+// just grab the first component.
+image_u8_t *pjpeg_to_u8_baseline(pjpeg_t *pj)
+{
+    assert(pj->ncomponents > 0);
+
+    pjpeg_component_t *comp = &pj->components[0];
+
+    assert(comp->width >= pj->width && comp->height >= pj->height);
+
+    image_u8_t *im = image_u8_create(pj->width, pj->height);
+    for (int y = 0; y < im->height; y++)
+        memcpy(&im->buf[y*im->stride], &comp->data[y*comp->stride], pj->width);
+
+    return im;
+}
+
+static inline uint8_t clampd(double v)
+{
+    if (v < 0)
+        return 0;
+    if (v > 255)
+        return 255;
+
+    return (uint8_t) v;
+}
+
+static inline uint8_t clamp_u8(int32_t v)
+{
+    if (v < 0)
+        return 0;
+    if (v > 255)
+        return 255;
+    return v;
+}
+
+// color conversion formulas taken from JFIF spec v 1.02
+image_u8x3_t *pjpeg_to_u8x3_baseline(pjpeg_t *pj)
+{
+    assert(pj->ncomponents == 3);
+
+    pjpeg_component_t *Y = &pj->components[0];
+    pjpeg_component_t *Cb = &pj->components[1];
+    pjpeg_component_t *Cr = &pj->components[2];
+
+    int Cb_factor_y = Y->height / Cb->height;
+    int Cb_factor_x = Y->width / Cb->width;
+
+    int Cr_factor_y = Y->height / Cr->height;
+    int Cr_factor_x = Y->width / Cr->width;
+
+    image_u8x3_t *im = image_u8x3_create(pj->width, pj->height);
+
+    if (Cr_factor_y == 1 && Cr_factor_x == 1 && Cb_factor_y == 1 && Cb_factor_x == 1) {
+
+        for (int y = 0; y < pj->height; y++) {
+            for (int x = 0; x < pj->width; x++) {
+                int32_t y_val  = Y->data[y*Y->stride + x] * 65536;
+                int32_t cb_val = Cb->data[y*Cb->stride + x] - 128;
+                int32_t cr_val = Cr->data[y*Cr->stride + x] - 128;
+
+                int32_t r_val = y_val +  91881 * cr_val;
+                int32_t g_val = y_val + -22554 * cb_val - 46802 * cr_val;
+                int32_t b_val = y_val + 116130 * cb_val;
+
+                im->buf[y*im->stride + 3*x + 0 ] = clamp_u8(r_val >> 16);
+                im->buf[y*im->stride + 3*x + 1 ] = clamp_u8(g_val >> 16);
+                im->buf[y*im->stride + 3*x + 2 ] = clamp_u8(b_val >> 16);
+            }
+        }
+    } else if (Cb_factor_y == Cr_factor_y && Cb_factor_x == Cr_factor_x) {
+        for (int by = 0; by < pj->height / Cb_factor_y; by++) {
+            for (int bx = 0; bx < pj->width / Cb_factor_x; bx++) {
+
+                int32_t cb_val = Cb->data[by*Cb->stride + bx] - 128;
+                int32_t cr_val = Cr->data[by*Cr->stride + bx] - 128;
+
+                int32_t r0 =  91881 * cr_val;
+                int32_t g0 = -22554 * cb_val - 46802 * cr_val;
+                int32_t b0 = 116130 * cb_val;
+
+                for (int dy = 0; dy < Cb_factor_y; dy++) {
+                    int y = by*Cb_factor_y + dy;
+
+                    for (int dx = 0; dx < Cb_factor_x; dx++) {
+                        int x = bx*Cb_factor_x + dx;
+
+                        int32_t y_val = Y->data[y*Y->stride + x] * 65536;
+
+                        int32_t r_val = r0 + y_val;
+                        int32_t g_val = g0 + y_val;
+                        int32_t b_val = b0 + y_val;
+
+                        im->buf[y*im->stride + 3*x + 0 ] = clamp_u8(r_val >> 16);
+                        im->buf[y*im->stride + 3*x + 1 ] = clamp_u8(g_val >> 16);
+                        im->buf[y*im->stride + 3*x + 2 ] = clamp_u8(b_val >> 16);
+                    }
+                }
+            }
+        }
+    } else {
+
+        for (int y = 0; y < pj->height; y++) {
+            for (int x = 0; x < pj->width; x++) {
+                int32_t y_val  = Y->data[y*Y->stride + x];
+                int32_t cb_val = Cb->data[(y / Cb_factor_y)*Cb->stride + (x / Cb_factor_x)] - 128;
+                int32_t cr_val = Cr->data[(y / Cr_factor_y)*Cr->stride + (x / Cr_factor_x)] - 128;
+
+                uint8_t r_val = clampd(y_val + 1.402 * cr_val);
+                uint8_t g_val = clampd(y_val - 0.34414 * cb_val - 0.71414 * cr_val);
+                uint8_t b_val = clampd(y_val + 1.772 * cb_val);
+
+                im->buf[y*im->stride + 3*x + 0 ] = r_val;
+                im->buf[y*im->stride + 3*x + 1 ] = g_val;
+                im->buf[y*im->stride + 3*x + 2 ] = b_val;
+            }
+        }
+    }
+
+    return im;
+}
+
+///////////////////////////////////////////////////////////////////
+// returns NULL if file loading fails.
+pjpeg_t *pjpeg_create_from_file(const char *path, uint32_t flags, int *error)
+{
+    FILE *f = fopen(path, "rb");
+    if (f == NULL)
+        return NULL;
+
+    fseek(f, 0, SEEK_END);
+    long buflen = ftell(f);
+
+    uint8_t *buf = malloc(buflen);
+    fseek(f, 0, SEEK_SET);
+    int res = fread(buf, 1, buflen, f);
+
+    if ( ferror(f) ){
+        debug_print ("Read failed");
+        clearerr(f);
+    }
+
+    fclose(f);
+    if (res != buflen) {
+        free(buf);
+        if (error)
+            *error = PJPEG_ERR_FILE;
+        return NULL;
+    }
+
+    pjpeg_t *pj = pjpeg_create_from_buffer(buf, buflen, flags, error);
+
+    free(buf);
+    return pj;
+}
+
+pjpeg_t *pjpeg_create_from_buffer(uint8_t *buf, int buflen, uint32_t flags, int *error)
+{
+    struct pjpeg_decode_state pjd;
+    memset(&pjd, 0, sizeof(pjd));
+
+    if (flags & PJPEG_MJPEG) {
+        pjd.in = mjpeg_dht;
+        pjd.inlen = sizeof(mjpeg_dht);
+        int result = pjpeg_decode_buffer(&pjd);
+        assert(result == 0);
+    }
+
+    pjd.in = buf;
+    pjd.inlen = buflen;
+    pjd.flags = flags;
+
+    int result = pjpeg_decode_buffer(&pjd);
+    if (error)
+        *error = result;
+
+    if (result) {
+        for (int i = 0; i < pjd.ncomponents; i++)
+            free(pjd.components[i].data);
+        free(pjd.components);
+
+        return NULL;
+    }
+
+    pjpeg_t *pj = calloc(1, sizeof(pjpeg_t));
+
+    pj->width = pjd.width;
+    pj->height = pjd.height;
+    pj->ncomponents = pjd.ncomponents;
+    pj->components = pjd.components;
+
+    return pj;
+}
diff --git a/common/pjpeg.h b/common/pjpeg.h
new file mode 100644
index 0000000..c9ba9c8
--- /dev/null
+++ b/common/pjpeg.h
@@ -0,0 +1,103 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include "image_u8.h"
+#include "image_u8x3.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct pjpeg_component pjpeg_component_t;
+struct pjpeg_component
+{
+    // resolution of this component (which is smaller than the
+    // dimensions of the image if the channel has been sub-sampled.)
+    uint32_t width, height;
+
+    // number of bytes per row. May be larger than width for alignment
+    // reasons.
+    uint32_t stride;
+
+    // data[y*stride + x]
+    uint8_t *data;
+
+    ////////////////////////////////////////////////////////////////
+    // These items probably not of great interest to most
+    // applications.
+    uint8_t id; // the identifier associated with this component
+    uint8_t hv; // horiz scale (high 4 bits) / vert scale (low 4 bits)
+    uint8_t scalex, scaley; // derived from hv above
+    uint8_t tq; // quantization table index
+
+    // this filled in at the last moment by SOS
+    uint8_t tda; // which huff tables will we use for DC (high 4 bits) and AC (low 4 bits)
+};
+
+typedef struct pjpeg pjpeg_t;
+struct pjpeg
+{
+    // status of the decode is put here. Non-zero means error.
+    int error;
+
+    uint32_t width, height; // pixel dimensions
+
+    int ncomponents;
+    pjpeg_component_t *components;
+};
+
+enum PJPEG_FLAGS {
+    PJPEG_STRICT = 1,  // Don't try to recover from errors.
+    PJPEG_MJPEG = 2,   // Support JPGs with missing DHT segments.
+};
+
+enum PJPEG_ERROR {
+    PJPEG_OKAY = 0,
+    PJPEG_ERR_FILE, // something wrong reading file
+    PJPEG_ERR_DQT, // something wrong with DQT marker
+    PJPEG_ERR_SOF, // something wrong with SOF marker
+    PJPEG_ERR_DHT, // something wrong with DHT marker
+    PJPEG_ERR_SOS, // something wrong with SOS marker
+    PJPEG_ERR_MISSING_DHT, // missing a necessary huffman table
+    PJPEG_ERR_DRI, // something wrong with DRI marker
+    PJPEG_ERR_RESET, // didn't get a reset marker where we expected. Corruption?
+    PJPEG_ERR_EOF, // ran out of bytes while decoding
+    PJEPG_ERR_UNSUPPORTED, // an unsupported format
+};
+
+pjpeg_t *pjpeg_create_from_file(const char *path, uint32_t flags, int *error);
+pjpeg_t *pjpeg_create_from_buffer(uint8_t *buf, int buflen, uint32_t flags, int *error);
+void pjpeg_destroy(pjpeg_t *pj);
+
+image_u8_t *pjpeg_to_u8_baseline(pjpeg_t *pj);
+image_u8x3_t *pjpeg_to_u8x3_baseline(pjpeg_t *pj);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/pnm.c b/common/pnm.c
new file mode 100644
index 0000000..fe77dde
--- /dev/null
+++ b/common/pnm.c
@@ -0,0 +1,154 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "pnm.h"
+
+pnm_t *pnm_create_from_file(const char *path)
+{
+    FILE *f = fopen(path, "rb");
+    if (f == NULL)
+        return NULL;
+
+    pnm_t *pnm = calloc(1, sizeof(pnm_t));
+    pnm->format = -1;
+
+    char tmp[1024];
+    int nparams = 0; // will be 3 when we're all done.
+    int params[3];
+
+    while (nparams < 3 && !(pnm->format == PNM_FORMAT_BINARY && nparams == 2)) {
+        if (fgets(tmp, sizeof(tmp), f) == NULL)
+            goto error;
+
+        // skip comments
+        if (tmp[0]=='#')
+            continue;
+
+        char *p = tmp;
+
+        if (pnm->format == -1 && tmp[0]=='P') {
+            pnm->format = tmp[1]-'0';
+            assert(pnm->format == PNM_FORMAT_GRAY || pnm->format == PNM_FORMAT_RGB || pnm->format == PNM_FORMAT_BINARY);
+            p = &tmp[2];
+        }
+
+        // pull integers out of this line until there are no more.
+        while (nparams < 3 && *p!=0) {
+            while (*p==' ')
+                p++;
+
+            // encounter rubbish? (End of line?)
+            if (*p < '0' || *p > '9')
+                break;
+
+            int acc = 0;
+            while (*p >= '0' && *p <= '9') {
+                acc = acc*10 + *p - '0';
+                p++;
+            }
+
+            params[nparams++] = acc;
+            p++;
+        }
+    }
+
+    pnm->width = params[0];
+    pnm->height = params[1];
+    pnm->max = params[2];
+
+    switch (pnm->format) {
+        case PNM_FORMAT_BINARY: {
+            // files in the wild sometimes simply don't set max
+            pnm->max = 1;
+
+            pnm->buflen = pnm->height * ((pnm->width + 7)  / 8);
+            pnm->buf = malloc(pnm->buflen);
+            size_t len = fread(pnm->buf, 1, pnm->buflen, f);
+            if (len != pnm->buflen)
+                goto error;
+
+            fclose(f);
+            return pnm;
+        }
+
+        case PNM_FORMAT_GRAY: {
+            if (pnm->max == 255)
+                pnm->buflen = pnm->width * pnm->height;
+            else if (pnm->max == 65535)
+                pnm->buflen = 2 * pnm->width * pnm->height;
+            else
+                assert(0);
+
+            pnm->buf = malloc(pnm->buflen);
+            size_t len = fread(pnm->buf, 1, pnm->buflen, f);
+            if (len != pnm->buflen)
+                goto error;
+
+            fclose(f);
+            return pnm;
+        }
+
+        case PNM_FORMAT_RGB: {
+            if (pnm->max == 255)
+                pnm->buflen = pnm->width * pnm->height * 3;
+            else if (pnm->max == 65535)
+                pnm->buflen = 2 * pnm->width * pnm->height * 3;
+            else
+                assert(0);
+
+            pnm->buf = malloc(pnm->buflen);
+            size_t len = fread(pnm->buf, 1, pnm->buflen, f);
+            if (len != pnm->buflen)
+                goto error;
+            fclose(f);
+            return pnm;
+        }
+    }
+
+error:
+    fclose(f);
+
+    if (pnm != NULL) {
+        free(pnm->buf);
+        free(pnm);
+    }
+
+    return NULL;
+}
+
+void pnm_destroy(pnm_t *pnm)
+{
+    if (pnm == NULL)
+        return;
+
+    free(pnm->buf);
+    free(pnm);
+}
diff --git a/common/pnm.h b/common/pnm.h
new file mode 100644
index 0000000..4d8f46e
--- /dev/null
+++ b/common/pnm.h
@@ -0,0 +1,58 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PNM_FORMAT_BINARY 4
+#define PNM_FORMAT_GRAY 5
+#define PNM_FORMAT_RGB  6
+
+// supports ppm, pnm, pgm
+
+typedef struct pnm pnm_t;
+struct pnm
+{
+    int width, height;
+    int format;
+    int max; // 1 = binary, 255 = one byte, 65535 = two bytes
+
+    uint32_t buflen;
+    uint8_t *buf; // if max=65535, in big endian
+};
+
+pnm_t *pnm_create_from_file(const char *path);
+void pnm_destroy(pnm_t *pnm);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/postscript_utils.h b/common/postscript_utils.h
new file mode 100644
index 0000000..b561c5b
--- /dev/null
+++ b/common/postscript_utils.h
@@ -0,0 +1,53 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+// write commands in postscript language to render an image in the current
+// graphics environment. The image will be rendered in one pixel per unit
+// with Y up coordinate axis (e.g. upside down).
+static void postscript_image(FILE *f, image_u8_t *im)
+{
+//    fprintf(f, "/readstring {\n  currentfile exch readhexstring pop\n} bind def\n");
+    fprintf(f, "/picstr %d string def\n", im->width);
+
+    fprintf(f, "%d %d 8 [1 0 0 1 0 0]\n",
+            im->width, im->height);
+
+    fprintf(f, "{currentfile picstr readhexstring pop}\nimage\n");
+
+    for (int y = 0; y < im->height; y++) {
+        for (int x = 0; x < im->width; x++) {
+            uint8_t v = im->buf[y*im->stride + x];
+            fprintf(f, "%02x", v);
+            if ((x % 32)==31)
+                fprintf(f, "\n");
+        }
+    }
+
+    fprintf(f, "\n");
+}
diff --git a/common/pthreads_cross.cpp b/common/pthreads_cross.cpp
new file mode 100644
index 0000000..f772191
--- /dev/null
+++ b/common/pthreads_cross.cpp
@@ -0,0 +1,259 @@
+/**

+Copyright John Schember <john@nachtimwald.com>

+

+Permission is hereby granted, free of charge, to any person obtaining a copy of

+this software and associated documentation files (the "Software"), to deal in

+the Software without restriction, including without limitation the rights to

+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

+of the Software, and to permit persons to whom the Software is furnished to do

+so, subject to the following conditions:

+

+The above copyright notice and this permission notice shall be included in all

+copies or substantial portions of the Software.

+

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

+SOFTWARE.

+ */

+

+#include "common/pthreads_cross.h"

+#include <time.h>

+

+#ifdef _WIN32

+

+typedef struct {

+    SRWLOCK lock;

+    bool    exclusive;

+} pthread_rwlock_t;

+

+int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);

+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock);

+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);

+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);

+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);

+int pthread_rwlock_trywrlock(pthread_rwlock_t  *rwlock);

+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock);

+

+int pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)

+{

+    (void) attr;

+

+    if (thread == NULL || start_routine == NULL)

+        return 1;

+

+    *thread = (HANDLE) CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)start_routine, arg, 0, NULL);

+    if (*thread == NULL)

+        return 1;

+    return 0;

+}

+

+int pthread_join(pthread_t thread, void **value_ptr)

+{

+    (void)value_ptr;

+    WaitForSingleObject(thread, INFINITE);

+    CloseHandle(thread);

+    return 0;

+}

+

+int pthread_detach(pthread_t thread)

+{

+    CloseHandle(thread);

+    return 0;

+}

+

+int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr)

+{

+    (void)attr;

+

+    if (mutex == NULL)

+        return 1;

+

+    InitializeCriticalSection(mutex);

+    return 0;

+}

+

+int pthread_mutex_destroy(pthread_mutex_t *mutex)

+{

+    if (mutex == NULL)

+        return 1;

+    DeleteCriticalSection(mutex);

+    return 0;

+}

+

+int pthread_mutex_lock(pthread_mutex_t *mutex)

+{

+    if (mutex == NULL)

+        return 1;

+    EnterCriticalSection(mutex);

+    return 0;

+}

+

+int pthread_mutex_unlock(pthread_mutex_t *mutex)

+{

+    if (mutex == NULL)

+        return 1;

+    LeaveCriticalSection(mutex);

+    return 0;

+}

+

+int pthread_cond_init(pthread_cond_t *cond, pthread_condattr_t *attr)

+{

+    (void)attr;

+    if (cond == NULL)

+        return 1;

+    InitializeConditionVariable(cond);

+    return 0;

+}

+

+int pthread_cond_destroy(pthread_cond_t *cond)

+{

+    /* Windows does not have a destroy for conditionals */

+    (void)cond;

+    return 0;

+}

+

+int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)

+{

+    if (cond == NULL || mutex == NULL)

+        return 1;

+    return pthread_cond_timedwait(cond, mutex, NULL);

+}

+

+int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,

+        const struct timespec *abstime)

+{

+    if (cond == NULL || mutex == NULL)

+        return 1;

+    if (!SleepConditionVariableCS(cond, mutex, timespec_to_ms(abstime)))

+        return 1;

+    return 0;

+}

+

+int pthread_cond_signal(pthread_cond_t *cond)

+{

+    if (cond == NULL)

+        return 1;

+    WakeConditionVariable(cond);

+    return 0;

+}

+

+int pthread_cond_broadcast(pthread_cond_t *cond)

+{

+    if (cond == NULL)

+        return 1;

+    WakeAllConditionVariable(cond);

+    return 0;

+}

+

+int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr)

+{

+    (void)attr;

+    if (rwlock == NULL)

+        return 1;

+    InitializeSRWLock(&(rwlock->lock));

+    rwlock->exclusive = false;

+    return 0;

+}

+

+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)

+{

+    (void)rwlock;

+    return 0;

+}

+

+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)

+{

+    if (rwlock == NULL)

+        return 1;

+    AcquireSRWLockShared(&(rwlock->lock));

+    return 0;

+}

+

+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)

+{

+    if (rwlock == NULL)

+        return 1;

+    return !TryAcquireSRWLockShared(&(rwlock->lock));

+}

+

+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)

+{

+    if (rwlock == NULL)

+        return 1;

+    AcquireSRWLockExclusive(&(rwlock->lock));

+    rwlock->exclusive = true;

+    return 0;

+}

+

+int pthread_rwlock_trywrlock(pthread_rwlock_t  *rwlock)

+{

+    BOOLEAN ret;

+

+    if (rwlock == NULL)

+        return 1;

+

+    ret = TryAcquireSRWLockExclusive(&(rwlock->lock));

+    if (ret)

+        rwlock->exclusive = true;

+    return ret;

+}

+

+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)

+{

+    if (rwlock == NULL)

+        return 1;

+

+    if (rwlock->exclusive) {

+        rwlock->exclusive = false;

+        ReleaseSRWLockExclusive(&(rwlock->lock));

+    } else {

+        ReleaseSRWLockShared(&(rwlock->lock));

+    }

+    return 0;

+}

+

+int sched_yield() {

+    return (int)SwitchToThread();

+}

+

+void ms_to_timespec(struct timespec *ts, unsigned int ms)

+{

+    if (ts == NULL)

+        return;

+    ts->tv_sec = (ms / 1000) + time(NULL);

+    ts->tv_nsec = (ms % 1000) * 1000000;

+}

+

+unsigned int timespec_to_ms(const struct timespec *abstime)

+{

+    DWORD t;

+

+    if (abstime == NULL)

+        return INFINITE;

+

+    t = ((abstime->tv_sec - time(NULL)) * 1000) + (abstime->tv_nsec / 1000000);

+    if (t < 0)

+        t = 1;

+    return t;

+}

+

+unsigned int pcthread_get_num_procs()

+{

+    SYSTEM_INFO sysinfo;

+

+    GetSystemInfo(&sysinfo);

+    return sysinfo.dwNumberOfProcessors;

+}

+

+#else

+

+#include <unistd.h>

+unsigned int pcthread_get_num_procs()

+{

+    return (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);

+}

+#endif

diff --git a/common/pthreads_cross.h b/common/pthreads_cross.h
new file mode 100644
index 0000000..897a333
--- /dev/null
+++ b/common/pthreads_cross.h
@@ -0,0 +1,81 @@
+/**

+Copyright John Schember <john@nachtimwald.com>

+

+Permission is hereby granted, free of charge, to any person obtaining a copy of

+this software and associated documentation files (the "Software"), to deal in

+the Software without restriction, including without limitation the rights to

+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

+of the Software, and to permit persons to whom the Software is furnished to do

+so, subject to the following conditions:

+

+The above copyright notice and this permission notice shall be included in all

+copies or substantial portions of the Software.

+

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

+SOFTWARE.

+*/

+

+#ifndef __CPTHREAD_H__

+#define __CPTHREAD_H__

+

+#ifdef _WIN32

+#include <stdbool.h>

+#include <windows.h>

+#else

+#include <pthread.h>

+#include <sched.h>

+#endif

+

+#ifdef _WIN32

+

+typedef CRITICAL_SECTION pthread_mutex_t;

+typedef void pthread_mutexattr_t;

+typedef void pthread_attr_t;

+typedef void pthread_condattr_t;

+typedef void pthread_rwlockattr_t;

+typedef HANDLE pthread_t;

+typedef CONDITION_VARIABLE pthread_cond_t;

+

+#ifdef __cplusplus

+extern "C" {

+#endif

+int pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine)(void *), void *arg);

+int pthread_join(pthread_t thread, void **value_ptr);

+int pthread_detach(pthread_t);

+

+int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr);

+int pthread_mutex_destroy(pthread_mutex_t *mutex);

+int pthread_mutex_lock(pthread_mutex_t *mutex);

+int pthread_mutex_unlock(pthread_mutex_t *mutex);

+

+int pthread_cond_init(pthread_cond_t *cond, pthread_condattr_t *attr);

+int pthread_cond_destroy(pthread_cond_t *cond);

+int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex);

+int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime);

+int pthread_cond_signal(pthread_cond_t *cond);

+int pthread_cond_broadcast(pthread_cond_t *cond);

+

+int sched_yield(void);

+#ifdef __cplusplus

+}

+#endif

+#endif

+

+

+#ifdef __cplusplus

+extern "C" {

+#endif

+unsigned int pcthread_get_num_procs();

+

+void ms_to_timespec(struct timespec *ts, unsigned int ms);

+unsigned int timespec_to_ms(const struct timespec *abstime);

+#ifdef __cplusplus

+}

+#endif

+

+#endif /* __CPTHREAD_H__ */

diff --git a/common/string_util.c b/common/string_util.c
new file mode 100644
index 0000000..7c64c13
--- /dev/null
+++ b/common/string_util.c
@@ -0,0 +1,771 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "string_util.h"
+#include "zarray.h"
+
+struct string_buffer
+{
+    char *s;
+    int alloc;
+    size_t size; // as if strlen() was called; not counting terminating \0
+};
+
+#define MIN_PRINTF_ALLOC 16
+
+char *sprintf_alloc(const char *fmt, ...)
+{
+    assert(fmt != NULL);
+
+    va_list args;
+
+    va_start(args,fmt);
+    char *buf = vsprintf_alloc(fmt, args);
+    va_end(args);
+
+    return buf;
+}
+
+char *vsprintf_alloc(const char *fmt, va_list orig_args)
+{
+    assert(fmt != NULL);
+
+    int size = MIN_PRINTF_ALLOC;
+    char *buf = malloc(size * sizeof(char));
+
+    int returnsize;
+    va_list args;
+
+    va_copy(args, orig_args);
+    returnsize = vsnprintf(buf, size, fmt, args);
+    va_end(args);
+
+    // it was successful
+    if (returnsize < size) {
+        return buf;
+    }
+
+    // otherwise, we should try again
+    free(buf);
+    size = returnsize + 1;
+    buf = malloc(size * sizeof(char));
+
+    va_copy(args, orig_args);
+    returnsize = vsnprintf(buf, size, fmt, args);
+    va_end(args);
+
+    assert(returnsize <= size);
+    return buf;
+}
+
+char *_str_concat_private(const char *first, ...)
+{
+    size_t len = 0;
+
+    // get the total length (for the allocation)
+    {
+        va_list args;
+        va_start(args, first);
+        const char *arg = first;
+        while(arg != NULL) {
+            len += strlen(arg);
+            arg = va_arg(args, const char *);
+        }
+        va_end(args);
+    }
+
+    // write the string
+    char *str = malloc(len*sizeof(char) + 1);
+    char *ptr = str;
+    {
+        va_list args;
+        va_start(args, first);
+        const char *arg = first;
+        while(arg != NULL) {
+            while(*arg)
+                *ptr++ = *arg++;
+            arg = va_arg(args, const char *);
+        }
+        *ptr = '\0';
+        va_end(args);
+    }
+
+    return str;
+}
+
+// Returns the index of the first character that differs:
+int str_diff_idx(const char * a, const char * b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+
+    int i = 0;
+
+    size_t lena = strlen(a);
+    size_t lenb = strlen(b);
+
+    size_t minlen = lena < lenb ? lena : lenb;
+
+    for (; i < minlen; i++)
+        if (a[i] != b[i])
+            break;
+
+    return i;
+}
+
+
+zarray_t *str_split(const char *str, const char *delim)
+{
+    assert(str != NULL);
+    assert(delim != NULL);
+
+    zarray_t *parts = zarray_create(sizeof(char*));
+    string_buffer_t *sb = string_buffer_create();
+
+    size_t delim_len = strlen(delim);
+    size_t len = strlen(str);
+    size_t pos = 0;
+
+    while (pos < len) {
+        if (str_starts_with(&str[pos], delim) && delim_len > 0) {
+            pos += delim_len;
+            // never add empty strings (repeated tokens)
+            if (string_buffer_size(sb) > 0) {
+                char *part = string_buffer_to_string(sb);
+                zarray_add(parts, &part);
+            }
+            string_buffer_reset(sb);
+        } else {
+            string_buffer_append(sb, str[pos]);
+            pos++;
+        }
+    }
+
+    if (string_buffer_size(sb) > 0) {
+        char *part = string_buffer_to_string(sb);
+        zarray_add(parts, &part);
+    }
+
+    string_buffer_destroy(sb);
+    return parts;
+}
+
+// split on one or more spaces.
+zarray_t *str_split_spaces(const char *str)
+{
+  zarray_t *parts = zarray_create(sizeof(char*));
+  size_t len = strlen(str);
+  size_t pos = 0;
+
+  while (pos < len) {
+
+    while (pos < len && str[pos] == ' ')
+      pos++;
+
+    // produce a token?
+    if (pos < len) {
+      // yes!
+      size_t off0 = pos;
+      while (pos < len && str[pos] != ' ')
+	pos++;
+      size_t off1 = pos;
+
+      size_t len_off = off1 - off0;
+      char *tok = malloc(len_off + 1);
+      memcpy(tok, &str[off0], len_off);
+      tok[len_off] = 0;
+      zarray_add(parts, &tok);
+    }
+  }
+
+  return parts;
+}
+
+void str_split_destroy(zarray_t *za)
+{
+    if (!za)
+        return;
+
+    zarray_vmap(za, free);
+    zarray_destroy(za);
+}
+
+char *str_trim(char *str)
+{
+    assert(str != NULL);
+
+    return str_lstrip(str_rstrip(str));
+}
+
+char *str_lstrip(char *str)
+{
+    assert(str != NULL);
+
+    char *ptr = str;
+    char *end = str + strlen(str);
+    for(; ptr != end && isspace(*ptr); ptr++);
+    // shift the string to the left so the original pointer still works
+    memmove(str, ptr, strlen(ptr)+1);
+    return str;
+}
+
+char *str_rstrip(char *str)
+{
+    assert(str != NULL);
+
+    char *ptr = str + strlen(str) - 1;
+    for(; ptr+1 != str && isspace(*ptr); ptr--);
+    *(ptr+1) = '\0';
+    return str;
+}
+
+int str_indexof(const char *haystack, const char *needle)
+{
+	assert(haystack != NULL);
+	assert(needle != NULL);
+
+    // use signed types for hlen/nlen because hlen - nlen can be negative.
+    int hlen = (int) strlen(haystack);
+    int nlen = (int) strlen(needle);
+
+    if (nlen > hlen) return -1;
+
+    for (int i = 0; i <= hlen - nlen; i++) {
+        if (!strncmp(&haystack[i], needle, nlen))
+            return i;
+    }
+
+    return -1;
+}
+
+int str_last_indexof(const char *haystack, const char *needle)
+{
+	assert(haystack != NULL);
+	assert(needle != NULL);
+
+    // use signed types for hlen/nlen because hlen - nlen can be negative.
+    int hlen = (int) strlen(haystack);
+    int nlen = (int) strlen(needle);
+
+    int last_index = -1;
+    for (int i = 0; i <= hlen - nlen; i++) {
+        if (!strncmp(&haystack[i], needle, nlen))
+            last_index = i;
+    }
+
+    return last_index;
+}
+
+// in-place modification.
+char *str_tolowercase(char *s)
+{
+	assert(s != NULL);
+
+    size_t slen = strlen(s);
+    for (int i = 0; i < slen; i++) {
+        if (s[i] >= 'A' && s[i] <= 'Z')
+            s[i] = s[i] + 'a' - 'A';
+    }
+
+    return s;
+}
+
+char *str_touppercase(char *s)
+{
+    assert(s != NULL);
+
+    size_t slen = strlen(s);
+    for (int i = 0; i < slen; i++) {
+        if (s[i] >= 'a' && s[i] <= 'z')
+            s[i] = s[i] - ('a' - 'A');
+    }
+
+    return s;
+}
+
+string_buffer_t* string_buffer_create()
+{
+    string_buffer_t *sb = (string_buffer_t*) calloc(1, sizeof(string_buffer_t));
+    assert(sb != NULL);
+    sb->alloc = 32;
+    sb->s = calloc(sb->alloc, 1);
+    return sb;
+}
+
+void string_buffer_destroy(string_buffer_t *sb)
+{
+    if (sb == NULL)
+        return;
+
+    if (sb->s)
+        free(sb->s);
+
+    memset(sb, 0, sizeof(string_buffer_t));
+    free(sb);
+}
+
+void string_buffer_append(string_buffer_t *sb, char c)
+{
+    assert(sb != NULL);
+
+    if (sb->size+2 >= sb->alloc) {
+        sb->alloc *= 2;
+        sb->s = realloc(sb->s, sb->alloc);
+    }
+
+    sb->s[sb->size++] = c;
+    sb->s[sb->size] = 0;
+}
+
+char string_buffer_pop_back(string_buffer_t *sb) {
+    assert(sb != NULL);
+    if (sb->size == 0)
+        return 0;
+
+    char back = sb->s[--sb->size];
+    sb->s[sb->size] = 0;
+    return back;
+}
+
+void string_buffer_appendf(string_buffer_t *sb, const char *fmt, ...)
+{
+    assert(sb != NULL);
+    assert(fmt != NULL);
+
+    int size = MIN_PRINTF_ALLOC;
+    char *buf = malloc(size * sizeof(char));
+
+    int returnsize;
+    va_list args;
+
+    va_start(args,fmt);
+    returnsize = vsnprintf(buf, size, fmt, args);
+    va_end(args);
+
+    if (returnsize >= size) {
+        // otherwise, we should try again
+        free(buf);
+        size = returnsize + 1;
+        buf = malloc(size * sizeof(char));
+
+        va_start(args, fmt);
+        returnsize = vsnprintf(buf, size, fmt, args);
+        va_end(args);
+
+        assert(returnsize <= size);
+    }
+
+    string_buffer_append_string(sb, buf);
+    free(buf);
+}
+
+void string_buffer_append_string(string_buffer_t *sb, const char *str)
+{
+    assert(sb != NULL);
+    assert(str != NULL);
+
+    size_t len = strlen(str);
+
+    while (sb->size+len + 1 >= sb->alloc) {
+        sb->alloc *= 2;
+        sb->s = realloc(sb->s, sb->alloc);
+    }
+
+    memcpy(&sb->s[sb->size], str, len);
+    sb->size += len;
+    sb->s[sb->size] = 0;
+}
+
+bool string_buffer_ends_with(string_buffer_t *sb, const char *str)
+{
+    assert(sb != NULL);
+    assert(str != NULL);
+
+    return str_ends_with(sb->s, str);
+}
+
+char *string_buffer_to_string(string_buffer_t *sb)
+{
+    assert(sb != NULL);
+
+    return strdup(sb->s);
+}
+
+// returns length of string (not counting \0)
+size_t string_buffer_size(string_buffer_t *sb)
+{
+    assert(sb != NULL);
+
+    return sb->size;
+}
+
+void string_buffer_reset(string_buffer_t *sb)
+{
+    assert(sb != NULL);
+
+    sb->s[0] = 0;
+    sb->size = 0;
+}
+
+string_feeder_t *string_feeder_create(const char *str)
+{
+    assert(str != NULL);
+
+    string_feeder_t *sf = (string_feeder_t*) calloc(1, sizeof(string_feeder_t));
+    sf->s = strdup(str);
+    sf->len = strlen(sf->s);
+    sf->line = 1;
+    sf->col = 0;
+    sf->pos = 0;
+    return sf;
+}
+
+int string_feeder_get_line(string_feeder_t *sf)
+{
+    assert(sf != NULL);
+    return sf->line;
+}
+
+int string_feeder_get_column(string_feeder_t *sf)
+{
+    assert(sf != NULL);
+    return sf->col;
+}
+
+void string_feeder_destroy(string_feeder_t *sf)
+{
+    if (sf == NULL)
+        return;
+
+    free(sf->s);
+    memset(sf, 0, sizeof(string_feeder_t));
+    free(sf);
+}
+
+bool string_feeder_has_next(string_feeder_t *sf)
+{
+    assert(sf != NULL);
+
+    return sf->s[sf->pos] != 0 && sf->pos <= sf->len;
+}
+
+char string_feeder_next(string_feeder_t *sf)
+{
+    assert(sf != NULL);
+    assert(sf->pos <= sf->len);
+
+    char c = sf->s[sf->pos++];
+    if (c == '\n') {
+        sf->line++;
+        sf->col = 0;
+    } else {
+        sf->col++;
+    }
+
+    return c;
+}
+
+char *string_feeder_next_length(string_feeder_t *sf, size_t length)
+{
+    assert(sf != NULL);
+    assert(length >= 0);
+    assert(sf->pos <= sf->len);
+
+    if (sf->pos + length > sf->len)
+        length = sf->len - sf->pos;
+
+    char *substr = calloc(length+1, sizeof(char));
+    for (int i = 0 ; i < length ; i++)
+        substr[i] = string_feeder_next(sf);
+    return substr;
+}
+
+char string_feeder_peek(string_feeder_t *sf)
+{
+    assert(sf != NULL);
+    assert(sf->pos <= sf->len);
+
+    return sf->s[sf->pos];
+}
+
+char *string_feeder_peek_length(string_feeder_t *sf, size_t length)
+{
+    assert(sf != NULL);
+    assert(length >= 0);
+    assert(sf->pos <= sf->len);
+
+    if (sf->pos + length > sf->len)
+        length = sf->len - sf->pos;
+
+    char *substr = calloc(length+1, sizeof(char));
+    memcpy(substr, &sf->s[sf->pos], length*sizeof(char));
+    return substr;
+}
+
+bool string_feeder_starts_with(string_feeder_t *sf, const char *str)
+{
+    assert(sf != NULL);
+    assert(str != NULL);
+    assert(sf->pos <= sf->len);
+
+    return str_starts_with(&sf->s[sf->pos], str);
+}
+
+void string_feeder_require(string_feeder_t *sf, const char *str)
+{
+    assert(sf != NULL);
+    assert(str != NULL);
+    assert(sf->pos <= sf->len);
+
+    size_t len = strlen(str);
+
+    for (int i = 0; i < len; i++) {
+        char c = string_feeder_next(sf);
+        assert(c == str[i]);
+    }
+}
+
+////////////////////////////////////////////
+bool str_ends_with(const char *haystack, const char *needle)
+{
+    assert(haystack != NULL);
+    assert(needle != NULL);
+
+    size_t lens = strlen(haystack);
+    size_t lenneedle = strlen(needle);
+
+    if (lenneedle > lens)
+        return false;
+
+    return !strncmp(&haystack[lens - lenneedle], needle, lenneedle);
+}
+
+#ifndef _MSC_VER
+inline
+#endif
+bool str_starts_with(const char *haystack, const char *needle)
+{
+    assert(haystack != NULL);
+    assert(needle != NULL);
+
+    // haystack[pos] doesn't have to be compared to zero; if it were
+    // zero, it either doesn't match needle (in which case the loop
+    // terminates) or it matches needle[pos] (in which case the loop
+    // terminates).
+    int pos = 0;
+    while (haystack[pos] == needle[pos] && needle[pos] != 0)
+        pos++;
+
+    return (needle[pos] == 0);
+}
+
+bool str_starts_with_any(const char *haystack, const char **needles, int num_needles)
+{
+    assert(haystack != NULL);
+    assert(needles != NULL);
+    assert(num_needles >= 0);
+
+    for (int i = 0; i < num_needles; i++) {
+        assert(needles[i] != NULL);
+        if (str_starts_with(haystack, needles[i]))
+            return true;
+    }
+
+    return false;
+}
+
+bool str_matches_any(const char *haystack, const char **needles, int num_needles)
+{
+    assert(haystack != NULL);
+    assert(needles != NULL);
+    assert(num_needles >= 0);
+
+    for (int i = 0; i < num_needles; i++) {
+        assert(needles[i] != NULL);
+        if (!strcmp(haystack, needles[i]))
+            return true;
+    }
+
+    return false;
+}
+
+char *str_substring(const char *str, size_t startidx, long endidx)
+{
+    assert(str != NULL);
+    assert(startidx >= 0 && startidx <= strlen(str)+1);
+    assert(endidx < 0 || endidx >= startidx);
+    assert(endidx < 0 || endidx <= strlen(str)+1);
+
+    if (endidx < 0)
+        endidx = (long) strlen(str);
+
+    size_t blen = endidx - startidx; // not counting \0
+    char *b = malloc(blen + 1);
+    memcpy(b, &str[startidx], blen);
+    b[blen] = 0;
+    return b;
+}
+
+char *str_replace(const char *haystack, const char *needle, const char *replacement)
+{
+    assert(haystack != NULL);
+    assert(needle != NULL);
+    assert(replacement != NULL);
+
+    string_buffer_t *sb = string_buffer_create();
+    size_t haystack_len = strlen(haystack);
+    size_t needle_len = strlen(needle);
+
+    int pos = 0;
+    while (pos < haystack_len) {
+        if (needle_len > 0 && str_starts_with(&haystack[pos], needle)) {
+            string_buffer_append_string(sb, replacement);
+            pos += needle_len;
+        } else {
+            string_buffer_append(sb, haystack[pos]);
+            pos++;
+        }
+    }
+    if (needle_len == 0 && haystack_len == 0)
+        string_buffer_append_string(sb, replacement);
+
+    char *res = string_buffer_to_string(sb);
+    string_buffer_destroy(sb);
+    return res;
+}
+
+char *str_replace_many(const char *_haystack, ...)
+{
+    va_list ap;
+    va_start(ap, _haystack);
+
+    char *haystack = strdup(_haystack);
+
+    while (true) {
+        char *needle = va_arg(ap, char*);
+        if (!needle)
+            break;
+
+        char *replacement = va_arg(ap, char*);
+        char *tmp = str_replace(haystack, needle, replacement);
+        free(haystack);
+        haystack = tmp;
+    }
+
+    va_end(ap);
+
+    return haystack;
+}
+
+static void buffer_appendf(char **_buf, int *bufpos, void *fmt, ...)
+{
+    char *buf = *_buf;
+    va_list ap;
+
+    int salloc = 128;
+    char *s = malloc(salloc);
+
+    va_start(ap, fmt);
+    int slen = vsnprintf(s, salloc, fmt, ap);
+    va_end(ap);
+
+    if (slen >= salloc) {
+        s = realloc(s, slen + 1);
+        va_start(ap, fmt);
+        vsprintf((char*) s, fmt, ap);
+        va_end(ap);
+    }
+
+    buf = realloc(buf, *bufpos + slen + 1);
+    *_buf = buf;
+
+    memcpy(&buf[*bufpos], s, slen + 1); // get trailing \0
+    (*bufpos) += slen;
+
+    free(s);
+}
+
+static int is_variable_character(char c)
+{
+    if (c >= 'a' && c <= 'z')
+        return 1;
+
+    if (c >= 'A' && c <= 'Z')
+        return 1;
+
+    if (c >= '0' && c <= '9')
+        return 1;
+
+    if (c == '_')
+        return 1;
+
+    return 0;
+}
+
+char *str_expand_envs(const char *in)
+{
+    size_t inlen = strlen(in);
+    size_t inpos = 0;
+
+    char *out = NULL;
+    int  outpos = 0;
+
+    while (inpos < inlen) {
+
+        if (in[inpos] != '$') {
+            buffer_appendf(&out, &outpos, "%c", in[inpos]);
+            inpos++;
+            continue;
+
+        } else {
+            inpos++; // consume '$'
+
+            char *varname = NULL;
+            int  varnamepos = 0;
+
+            while (inpos < inlen && is_variable_character(in[inpos])) {
+                buffer_appendf(&varname, &varnamepos, "%c", in[inpos]);
+                inpos++;
+            }
+
+            char *env = getenv(varname);
+            if (env)
+                buffer_appendf(&out, &outpos, "%s", env);
+
+            free(varname);
+        }
+    }
+
+    return out;
+}
diff --git a/common/string_util.h b/common/string_util.h
new file mode 100644
index 0000000..2625838
--- /dev/null
+++ b/common/string_util.h
@@ -0,0 +1,465 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <ctype.h>
+
+#include "zarray.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef struct string_buffer string_buffer_t;
+
+typedef struct string_feeder string_feeder_t;
+struct string_feeder
+{
+    char *s;
+    size_t len;
+    size_t pos;
+
+    int line, col;
+};
+
+/**
+ * Similar to sprintf(), except that it will malloc() enough space for the
+ * formatted string which it returns. It is the caller's responsibility to call
+ * free() on the returned string when it is no longer needed.
+ */
+char *sprintf_alloc(const char *fmt, ...)
+#ifndef _MSC_VER
+__attribute__ ((format (printf, 1, 2)))
+#endif
+;
+
+/**
+ * Similar to vsprintf(), except that it will malloc() enough space for the
+ * formatted string which it returns. It is the caller's responsibility to call
+ * free() on the returned string when it is no longer needed.
+ */
+char *vsprintf_alloc(const char *fmt, va_list args);
+
+/**
+ * Concatenates 1 or more strings together and returns the result, which will be a
+ * newly allocated string which it is the caller's responsibility to free.
+ */
+#define str_concat(...) _str_concat_private(__VA_ARGS__, NULL)
+char *_str_concat_private(const char *first, ...);
+
+
+// Returns the index of the first character that differs:
+int str_diff_idx(const char * a, const char * b);
+
+/**
+ * Splits the supplied string into an array of strings by subdividing it at
+ * each occurrence of the supplied delimiter string. The split strings will not
+ * contain the delimiter. The original string will remain unchanged.
+ * If str is composed of all delimiters, an empty array will be returned.
+ *
+ * It is the caller's responsibilty to free the returned zarray, as well as
+ * the strings contained within it, e.g.:
+ *
+ *   zarray_t *za = str_split("this is a haystack", " ");
+ *      => ["this", "is", "a", "haystack"]
+ *   zarray_vmap(za, free);
+ *   zarray_destroy(za);
+ */
+zarray_t *str_split(const char *str, const char *delim);
+
+zarray_t *str_split_spaces(const char *str);
+
+void str_split_destroy(zarray_t *s);
+
+/*
+ * Determines if str1 exactly matches str2 (more efficient than strcmp(...) == 0)
+ */
+static inline bool streq(const char *str1, const char* str2)
+{
+    int i;
+    for (i = 0 ; str1[i] != '\0' ; i++) {
+        if (str1[i] != str2[i])
+            return false;
+    }
+
+    return str2[i] == '\0';
+}
+
+/**
+ * Determines if str1 exactly matches str2, ignoring case (more efficient than
+ * strcasecmp(...) == 0)
+ */
+static inline bool strcaseeq(const char *str1, const char* str2)
+{
+    int i;
+    for (i = 0 ; str1[i] != '\0' ; i++) {
+        if (str1[i] == str2[i])
+            continue;
+        else if (islower(str1[i]) && (str1[i] - 32) == str2[i])
+            continue;
+        else if (isupper(str1[i]) && (str1[i] + 32) == str2[i])
+            continue;
+
+        return false;
+    }
+
+    return str2[i] == '\0';
+}
+
+/**
+ * Trims whitespace characters (i.e. matching isspace()) from the beginning and/or
+ * end of the supplied string. This change affects the supplied string in-place.
+ * The supplied/edited string is returned to enable chained reference.
+ *
+ * Note: do not pass a string literal to this function
+ */
+char *str_trim(char *str);
+
+/**
+ * Trims whitespace characters (i.e. matching isspace()) from the beginning
+ * of the supplied string. This change affects the supplied string in-place.
+ * The supplied/edited string is returned to enable chained reference.
+ *
+ * Note: do not pass a string literal to this function
+ */
+char *str_lstrip(char *str);
+
+/**
+ * Trims whitespace characters (i.e. matching isspace()) from the end of the
+ * supplied string. This change affects the supplied string in-place.
+ * The supplied/edited string is returned to enable chained reference.
+ *
+ * Note: do not pass a string literal to this function
+ */
+char *str_rstrip(char *str);
+
+/**
+ * Returns true if the end of string 'haystack' matches 'needle', else false.
+ *
+ * Note: An empty needle ("") will match any source.
+ */
+bool str_ends_with(const char *haystack, const char *needle);
+
+/**
+ * Returns true if the start of string 'haystack' matches 'needle', else false.
+ *
+ * Note: An empty needle ("") will match any source.
+ */
+bool str_starts_with(const char *haystack, const char *needle);
+
+/**
+ * Returns true if the start of string 'haystack' matches any needle, else false.
+ *
+ * Note: An empty needle ("") will match any source.
+ */
+bool str_starts_with_any(const char *haystack, const char **needles, int num_needles);
+
+/**
+ * Returns true if the string 'haystack' matches any needle, else false.
+ */
+bool str_matches_any(const char *haystack, const char **needles, int num_needles);
+
+/**
+ * Retrieves a (newly-allocated) substring of the given string, 'str', starting
+ * from character index 'startidx' through index 'endidx' - 1 (inclusive).
+ * An 'endidx' value -1 is equivalent to strlen(str).
+ *
+ * It is the caller's responsibility to free the returned string.
+ *
+ * Examples:
+ *   str_substring("string", 1, 3) = "tr"
+ *   str_substring("string", 2, -1) = "ring"
+ *   str_substring("string", 3, 3) = ""
+ *
+ * Note: startidx must be >= endidx
+ */
+char *str_substring(const char *str, size_t startidx, long endidx);
+
+/**
+ * Retrieves the zero-based index of the beginning of the supplied substring
+ * (needle) within the search string (haystack) if it exists.
+ *
+ * Returns -1 if the supplied needle is not found within the haystack.
+ */
+int str_indexof(const char *haystack, const char *needle);
+
+    static inline int str_contains(const char *haystack, const char *needle) {
+        return str_indexof(haystack, needle) >= 0;
+    }
+
+// same as above, but returns last match
+int str_last_indexof(const char *haystack, const char *needle);
+
+/**
+ * Replaces all upper-case characters within the supplied string with their
+ * lower-case counterparts, modifying the original string's contents.
+ *
+ * Returns the supplied / modified string.
+ */
+char *str_tolowercase(char *s);
+
+/**
+ * Replaces all lower-case characters within the supplied string with their
+ * upper-case counterparts, modifying the original string's contents.
+ *
+ * Returns the supplied / modified string.
+ */
+char *str_touppercase(char *s);
+
+/**
+ * Replaces all occurrences of 'needle' in the string 'haystack', substituting
+ * for them the value of 'replacement', and returns the result as a newly-allocated
+ * string. The original strings remain unchanged.
+ *
+ * It is the caller's responsibility to free the returned string.
+ *
+ * Examples:
+ *   str_replace("string", "ri", "u") = "stung"
+ *   str_replace("singing", "ing", "") = "s"
+ *   str_replace("string", "foo", "bar") = "string"
+ *
+ * Note: An empty needle will match only an empty haystack
+ */
+char *str_replace(const char *haystack, const char *needle, const char *replacement);
+
+    char *str_replace_many(const char *_haystack, ...);
+//////////////////////////////////////////////////////
+// String Buffer
+
+/**
+ * Creates and initializes a string buffer object which can be used with any of
+ * the string_buffer_*() functions.
+ *
+ * It is the caller's responsibility to free the string buffer resources with
+ * a call to string_buffer_destroy() when it is no longer needed.
+ */
+string_buffer_t *string_buffer_create();
+
+/**
+ * Frees the resources associated with a string buffer object, including space
+ * allocated for any appended characters / strings.
+ */
+void string_buffer_destroy(string_buffer_t *sb);
+
+/**
+ * Appends a single character to the end of the supplied string buffer.
+ */
+void string_buffer_append(string_buffer_t *sb, char c);
+
+/**
+ * Removes a single character from the end of the string and
+ * returns it. Does nothing if string is empty and returns NULL
+ */
+char string_buffer_pop_back(string_buffer_t *sb);
+
+/**
+ * Appends the supplied string to the end of the supplied string buffer.
+ */
+void string_buffer_append_string(string_buffer_t *sb, const char *str);
+
+/**
+ * Formats the supplied string and arguments in a manner akin to printf(), and
+ * appends the resulting string to the end of the supplied string buffer.
+ */
+void string_buffer_appendf(string_buffer_t *sb, const char *fmt, ...)
+#ifndef _MSC_VER
+__attribute__ ((format (printf, 2, 3)))
+#endif
+;
+
+/**
+ * Determines whether the character contents held by the supplied string buffer
+ * ends with the supplied string.
+ *
+ * Returns true if the string buffer's contents ends with 'str', else false.
+ */
+bool string_buffer_ends_with(string_buffer_t *sb, const char *str);
+
+/**
+ * Returns the string-length of the contents of the string buffer (not counting \0).
+ * Equivalent to calling strlen() on the string returned by string_buffer_to_string(sb).
+ */
+size_t string_buffer_size(string_buffer_t *sb);
+
+/**
+ * Returns the contents of the string buffer in a newly-allocated string, which
+ * it is the caller's responsibility to free once it is no longer needed.
+ */
+char *string_buffer_to_string(string_buffer_t *sb);
+
+/**
+ * Clears the contents of the string buffer, setting its length to zero.
+ */
+void string_buffer_reset(string_buffer_t *sb);
+
+//////////////////////////////////////////////////////
+// String Feeder
+
+/**
+ * Creates a string feeder object which can be used to traverse the supplied
+ * string using the string_feeder_*() functions. A local copy of the string's
+ * contents will be stored so that future changes to 'str' will not be
+ * reflected by the string feeder object.
+ *
+ * It is the caller's responsibility to call string_feeder_destroy() on the
+ * returned object when it is no longer needed.
+ */
+string_feeder_t *string_feeder_create(const char *str);
+
+/**
+ * Frees resources associated with the supplied string feeder object, after
+ * which it will no longer be valid for use.
+ */
+void string_feeder_destroy(string_feeder_t *sf);
+
+/**
+ * Determines whether any characters remain to be retrieved from the string
+ * feeder's string (not including the terminating '\0').
+ *
+ * Returns true if at least one more character can be retrieved with calls to
+ * string_feeder_next(), string_feeder_peek(), string_feeder_peek(), or
+ * string_feeder_consume(), else false.
+ */
+bool string_feeder_has_next(string_feeder_t *sf);
+
+/**
+ * Retrieves the next available character from the supplied string feeder
+ * (which may be the terminating '\0' character) and advances the feeder's
+ * position to the next character in the string.
+ *
+ * Note: Attempts to read past the end of the string will throw an assertion.
+ */
+char string_feeder_next(string_feeder_t *sf);
+
+/**
+ * Retrieves a series of characters from the supplied string feeder. The number
+ * of characters returned will be 'length' or the number of characters
+ * remaining in the string, whichever is shorter. The string feeder's position
+ * will be advanced by the number of characters returned.
+ *
+ * It is the caller's responsibility to free the returned string when it is no
+ * longer needed.
+ *
+ * Note: Calling once the end of the string has already been read will throw an assertion.
+ */
+char *string_feeder_next_length(string_feeder_t *sf, size_t length);
+
+/**
+ * Retrieves the next available character from the supplied string feeder
+ * (which may be the terminating '\0' character), but does not advance
+ * the feeder's position so that subsequent calls to _next() or _peek() will
+ * retrieve the same character.
+ *
+ * Note: Attempts to peek past the end of the string will throw an assertion.
+ */
+char string_feeder_peek(string_feeder_t *sf);
+
+/**
+ * Retrieves a series of characters from the supplied string feeder. The number
+ * of characters returned will be 'length' or the number of characters
+ * remaining in the string, whichever is shorter. The string feeder's position
+ * will not be advanced.
+ *
+ * It is the caller's responsibility to free the returned string when it is no
+ * longer needed.
+ *
+ * Note: Calling once the end of the string has already been read will throw an assertion.
+ */
+char *string_feeder_peek_length(string_feeder_t *sf, size_t length);
+
+/**
+ * Retrieves the line number of the current position in the supplied
+ * string feeder, which will be incremented whenever a newline is consumed.
+ *
+ * Examples:
+ *   prior to reading 1st character:                line = 1, column = 0
+ *   after reading 1st non-newline character:       line = 1, column = 1
+ *   after reading 2nd non-newline character:       line = 1, column = 2
+ *   after reading 1st newline character:           line = 2, column = 0
+ *   after reading 1st character after 1st newline: line = 2, column = 1
+ *   after reading 2nd newline character:           line = 3, column = 0
+ */
+int string_feeder_get_line(string_feeder_t *sf);
+
+/**
+ * Retrieves the column index in the current line for the current position
+ * in the supplied string feeder, which will be incremented with each
+ * non-newline character consumed, and reset to 0 whenever a newline (\n) is
+ * consumed.
+ *
+ * Examples:
+ *   prior to reading 1st character:                line = 1, column = 0
+ *   after reading 1st non-newline character:       line = 1, column = 1
+ *   after reading 2nd non-newline character:       line = 1, column = 2
+ *   after reading 1st newline character:           line = 2, column = 0
+ *   after reading 1st character after 1st newline: line = 2, column = 1
+ *   after reading 2nd newline character:           line = 3, column = 0
+ */
+int string_feeder_get_column(string_feeder_t *sf);
+
+/**
+ * Determines whether the supplied string feeder's remaining contents starts
+ * with the given string.
+ *
+ * Returns true if the beginning of the string feeder's remaining contents matches
+ * the supplied string exactly, else false.
+ */
+bool string_feeder_starts_with(string_feeder_t *sf, const char *str);
+
+/**
+ * Consumes from the string feeder the number of characters contained in the
+ * given string (not including the terminating '\0').
+ *
+ * Throws an assertion if the consumed characters do not exactly match the
+ * contents of the supplied string.
+ */
+void string_feeder_require(string_feeder_t *sf, const char *str);
+
+/*#ifndef strdup
+    static inline char *strdup(const char *s) {
+        int len = strlen(s);
+        char *out = malloc(len+1);
+        memcpy(out, s, len + 1);
+        return out;
+    }
+#endif
+*/
+
+
+// find everything that looks like an env variable and expand it
+// using getenv. Caller should free the result.
+// e.g. "$HOME/abc" ==> "/home/ebolson/abc"
+char *str_expand_envs(const char *in);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/svd22.c b/common/svd22.c
new file mode 100644
index 0000000..ba7486d
--- /dev/null
+++ b/common/svd22.c
@@ -0,0 +1,258 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <math.h>
+
+/** SVD 2x2.
+
+    Computes singular values and vectors without squaring the input
+    matrix. With double precision math, results are accurate to about
+    1E-16.
+
+    U = [ cos(theta) -sin(theta) ]
+        [ sin(theta)  cos(theta) ]
+
+    S = [ e  0 ]
+        [ 0  f ]
+
+    V = [ cos(phi)   -sin(phi) ]
+        [ sin(phi)   cos(phi)  ]
+
+
+    Our strategy is basically to analytically multiply everything out
+    and then rearrange so that we can solve for theta, phi, e, and
+    f. (Derivation by ebolson@umich.edu 5/2016)
+
+   V' = [ CP  SP ]
+        [ -SP CP ]
+
+USV' = [ CT -ST ][  e*CP  e*SP ]
+       [ ST  CT ][ -f*SP  f*CP ]
+
+     = [e*CT*CP + f*ST*SP     e*CT*SP - f*ST*CP ]
+       [e*ST*CP - f*SP*CT     e*SP*ST + f*CP*CT ]
+
+A00+A11 = e*CT*CP + f*ST*SP + e*SP*ST + f*CP*CT
+        = e*(CP*CT + SP*ST) + f*(SP*ST + CP*CT)
+    	= (e+f)(CP*CT + SP*ST)
+B0	    = (e+f)*cos(P-T)
+
+A00-A11 = e*CT*CP + f*ST*SP - e*SP*ST - f*CP*CT
+        = e*(CP*CT - SP*ST) - f*(-ST*SP + CP*CT)
+	    = (e-f)(CP*CT - SP*ST)
+B1	    = (e-f)*cos(P+T)
+
+A01+A10 = e*CT*SP - f*ST*CP + e*ST*CP - f*SP*CT
+	    = e(CT*SP + ST*CP) - f*(ST*CP + SP*CT)
+	    = (e-f)*(CT*SP + ST*CP)
+B2	    = (e-f)*sin(P+T)
+
+A01-A10 = e*CT*SP - f*ST*CP - e*ST*CP + f*SP*CT
+	= e*(CT*SP - ST*CP) + f(SP*CT - ST*CP)
+	= (e+f)*(CT*SP - ST*CP)
+B3	= (e+f)*sin(P-T)
+
+B0 = (e+f)*cos(P-T)
+B1 = (e-f)*cos(P+T)
+B2 = (e-f)*sin(P+T)
+B3 = (e+f)*sin(P-T)
+
+B3/B0 = tan(P-T)
+
+B2/B1 = tan(P+T)
+ **/
+void svd22(const double A[4], double U[4], double S[2], double V[4])
+{
+    double A00 = A[0];
+    double A01 = A[1];
+    double A10 = A[2];
+    double A11 = A[3];
+
+    double B0 = A00 + A11;
+    double B1 = A00 - A11;
+    double B2 = A01 + A10;
+    double B3 = A01 - A10;
+
+    double PminusT = atan2(B3, B0);
+    double PplusT = atan2(B2, B1);
+
+    double P = (PminusT + PplusT) / 2;
+    double T = (-PminusT + PplusT) / 2;
+
+    double CP = cos(P), SP = sin(P);
+    double CT = cos(T), ST = sin(T);
+
+    U[0] = CT;
+    U[1] = -ST;
+    U[2] = ST;
+    U[3] = CT;
+
+    V[0] = CP;
+    V[1] = -SP;
+    V[2] = SP;
+    V[3] = CP;
+
+    // C0 = e+f. There are two ways to compute C0; we pick the one
+    // that is better conditioned.
+    double CPmT = cos(P-T), SPmT = sin(P-T);
+    double C0 = 0;
+    if (fabs(CPmT) > fabs(SPmT))
+        C0 = B0 / CPmT;
+    else
+        C0 = B3 / SPmT;
+
+    // C1 = e-f. There are two ways to compute C1; we pick the one
+    // that is better conditioned.
+    double CPpT = cos(P+T), SPpT = sin(P+T);
+    double C1 = 0;
+    if (fabs(CPpT) > fabs(SPpT))
+        C1 = B1 / CPpT;
+    else
+        C1 = B2 / SPpT;
+
+    // e and f are the singular values
+    double e = (C0 + C1) / 2;
+    double f = (C0 - C1) / 2;
+
+    if (e < 0) {
+        e = -e;
+        U[0] = -U[0];
+        U[2] = -U[2];
+    }
+
+    if (f < 0) {
+        f = -f;
+        U[1] = -U[1];
+        U[3] = -U[3];
+    }
+
+    // sort singular values.
+    if (e > f) {
+        // already in big-to-small order.
+        S[0] = e;
+        S[1] = f;
+    } else {
+        // Curiously, this code never seems to get invoked.  Why is it
+        // that S[0] always ends up the dominant vector?  However,
+        // this code has been tested (flipping the logic forces us to
+        // sort the singular values in ascending order).
+        //
+        // P = [ 0 1 ; 1 0 ]
+        // USV' = (UP)(PSP)(PV')
+        //      = (UP)(PSP)(VP)'
+        //      = (UP)(PSP)(P'V')'
+        S[0] = f;
+        S[1] = e;
+
+        // exchange columns of U and V
+        double tmp[2];
+        tmp[0] = U[0];
+        tmp[1] = U[2];
+        U[0] = U[1];
+        U[2] = U[3];
+        U[1] = tmp[0];
+        U[3] = tmp[1];
+
+        tmp[0] = V[0];
+        tmp[1] = V[2];
+        V[0] = V[1];
+        V[2] = V[3];
+        V[1] = tmp[0];
+        V[3] = tmp[1];
+    }
+
+    /*
+    double SM[4] = { S[0], 0, 0, S[1] };
+
+    doubles_print_mat(U, 2, 2, "%20.10g");
+    doubles_print_mat(SM, 2, 2, "%20.10g");
+    doubles_print_mat(V, 2, 2, "%20.10g");
+    printf("A:\n");
+    doubles_print_mat(A, 2, 2, "%20.10g");
+
+    double SVt[4];
+    doubles_mat_ABt(SM, 2, 2, V, 2, 2, SVt, 2, 2);
+    double USVt[4];
+    doubles_mat_AB(U, 2, 2, SVt, 2, 2, USVt, 2, 2);
+
+    printf("USVt\n");
+    doubles_print_mat(USVt, 2, 2, "%20.10g");
+
+    double diff[4];
+    for (int i = 0; i < 4; i++)
+        diff[i] = A[i] - USVt[i];
+
+    printf("diff\n");
+    doubles_print_mat(diff, 2, 2, "%20.10g");
+
+    */
+
+}
+
+
+// for the matrix [a b; b d]
+void svd_sym_singular_values(double A00, double A01, double A11,
+                             double *Lmin, double *Lmax)
+{
+    double A10 = A01;
+
+    double B0 = A00 + A11;
+    double B1 = A00 - A11;
+    double B2 = A01 + A10;
+    double B3 = A01 - A10;
+
+    double PminusT = atan2(B3, B0);
+    double PplusT = atan2(B2, B1);
+
+    double P = (PminusT + PplusT) / 2;
+    double T = (-PminusT + PplusT) / 2;
+
+    // C0 = e+f. There are two ways to compute C0; we pick the one
+    // that is better conditioned.
+    double CPmT = cos(P-T), SPmT = sin(P-T);
+    double C0 = 0;
+    if (fabs(CPmT) > fabs(SPmT))
+        C0 = B0 / CPmT;
+    else
+        C0 = B3 / SPmT;
+
+    // C1 = e-f. There are two ways to compute C1; we pick the one
+    // that is better conditioned.
+    double CPpT = cos(P+T), SPpT = sin(P+T);
+    double C1 = 0;
+    if (fabs(CPpT) > fabs(SPpT))
+        C1 = B1 / CPpT;
+    else
+        C1 = B2 / SPpT;
+
+    // e and f are the singular values
+    double e = (C0 + C1) / 2;
+    double f = (C0 - C1) / 2;
+
+    *Lmin = fmin(e, f);
+    *Lmax = fmax(e, f);
+}
diff --git a/common/svd22.h b/common/svd22.h
new file mode 100644
index 0000000..5277f8b
--- /dev/null
+++ b/common/svd22.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+void svd22(const double A[4], double U[4], double S[2], double V[4]);
+
+// for the matrix [a b; b d]
+void svd_sym_singular_values(double A00, double A01, double A11,
+                             double *Lmin, double *Lmax);
diff --git a/common/time_util.c b/common/time_util.c
new file mode 100644
index 0000000..7a25f42
--- /dev/null
+++ b/common/time_util.c
@@ -0,0 +1,162 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdlib.h>
+#include <math.h>
+#include "time_util.h"
+
+struct timeutil_rest
+{
+    int64_t acc_time;
+    int64_t start_time;
+};
+
+timeutil_rest_t *timeutil_rest_create()
+{
+    timeutil_rest_t *rest = calloc(1, sizeof(timeutil_rest_t));
+    return rest;
+}
+
+void timeutil_rest_destroy(timeutil_rest_t *rest)
+{
+    free(rest);
+}
+
+int64_t utime_now() // blacklist-ignore
+{
+    struct timeval tv;
+    gettimeofday (&tv, NULL); // blacklist-ignore
+    return (int64_t) tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+int64_t utime_get_seconds(int64_t v)
+{
+    return v/1000000;
+}
+
+int64_t utime_get_useconds(int64_t v)
+{
+    return v%1000000;
+}
+
+void utime_to_timeval(int64_t v, struct timeval *tv)
+{
+    tv->tv_sec  = (time_t) utime_get_seconds(v);
+    tv->tv_usec = (suseconds_t) utime_get_useconds(v);
+}
+
+void utime_to_timespec(int64_t v, struct timespec *ts)
+{
+    ts->tv_sec  = (time_t) utime_get_seconds(v);
+    ts->tv_nsec = (suseconds_t) utime_get_useconds(v)*1000;
+}
+
+int32_t timeutil_usleep(int64_t useconds)
+{
+#ifdef _WIN32
+    Sleep(useconds/1000);
+    return 0;
+#else
+    // unistd.h function, but usleep is obsoleted in POSIX.1-2008.
+    // TODO: Eventually, rewrite this to use nanosleep
+    return usleep(useconds);
+#endif
+}
+
+uint32_t timeutil_sleep(unsigned int seconds)
+{
+#ifdef _WIN32
+    Sleep(seconds*1000);
+    return 0;
+#else
+    // unistd.h function
+    return sleep(seconds);
+#endif
+}
+
+int32_t timeutil_sleep_hz(timeutil_rest_t *rest, double hz)
+{
+    int64_t max_delay = 1000000L/hz;
+    int64_t curr_time = utime_now();
+    int64_t diff = curr_time - rest->start_time;
+    int64_t delay = max_delay - diff;
+    if (delay < 0) delay = 0;
+
+    int32_t ret = timeutil_usleep(delay);
+    rest->start_time = utime_now();
+
+    return ret;
+}
+
+void timeutil_timer_reset(timeutil_rest_t *rest)
+{
+    rest->start_time = utime_now();
+    rest->acc_time = 0;
+}
+
+void timeutil_timer_start(timeutil_rest_t *rest)
+{
+    rest->start_time = utime_now();
+}
+
+void timeutil_timer_stop(timeutil_rest_t *rest)
+{
+    int64_t curr_time = utime_now();
+    int64_t diff = curr_time - rest->start_time;
+
+    rest->acc_time += diff;
+}
+
+bool timeutil_timer_timeout(timeutil_rest_t *rest, double timeout_s)
+{
+    int64_t timeout_us = (int64_t)(1000000L*timeout_s);
+    return rest->acc_time > timeout_us;
+}
+
+int64_t time_util_hhmmss_ss_to_utime(double time)
+{
+    int64_t utime = 0;
+
+    int itime = ((int) time);
+
+    double seconds = fmod(time, 100.0);
+    uint8_t minutes = (itime % 10000) / 100;
+    uint8_t hours =  itime / 10000;
+
+    utime += seconds *   100;
+    utime += minutes *  6000;
+    utime += hours   *360000;
+
+    utime *= 10000;
+
+    return utime;
+}
+
+int64_t timeutil_ms_to_us(int32_t ms)
+{
+    return ((int64_t) ms) * 1000;
+}
diff --git a/common/time_util.h b/common/time_util.h
new file mode 100644
index 0000000..207e958
--- /dev/null
+++ b/common/time_util.h
@@ -0,0 +1,83 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <time.h>
+
+#ifdef _WIN32
+#include <windows.h>
+typedef long long suseconds_t;
+#endif
+
+#ifdef _MSC_VER
+
+inline int gettimeofday(struct timeval* tp, void* tzp)
+{
+  unsigned long t;
+  t = time(NULL);
+  tp->tv_sec = t / 1000;
+  tp->tv_usec = t % 1000;
+  return 0;
+}
+#else
+#include <sys/time.h>
+#include <unistd.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct timeutil_rest timeutil_rest_t;
+timeutil_rest_t *timeutil_rest_create();
+void timeutil_rest_destroy(timeutil_rest_t * rest);
+
+int64_t utime_now(); // blacklist-ignore
+int64_t utime_get_seconds(int64_t v);
+int64_t utime_get_useconds(int64_t v);
+void    utime_to_timeval(int64_t v, struct timeval *tv);
+void    utime_to_timespec(int64_t v, struct timespec *ts);
+
+int32_t  timeutil_usleep(int64_t useconds);
+uint32_t timeutil_sleep(unsigned int seconds);
+int32_t  timeutil_sleep_hz(timeutil_rest_t *rest, double hz);
+
+void timeutil_timer_reset(timeutil_rest_t *rest);
+void timeutil_timer_start(timeutil_rest_t *rest);
+void timeutil_timer_stop(timeutil_rest_t *rest);
+bool timeutil_timer_timeout(timeutil_rest_t *rest, double timeout_s);
+
+int64_t time_util_hhmmss_ss_to_utime(double time);
+
+int64_t timeutil_ms_to_us(int32_t ms);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/timeprofile.h b/common/timeprofile.h
new file mode 100644
index 0000000..8016386
--- /dev/null
+++ b/common/timeprofile.h
@@ -0,0 +1,120 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "time_util.h"
+#include "zarray.h"
+
+struct timeprofile_entry
+{
+    char name[32];
+    int64_t utime;
+};
+
+typedef struct timeprofile timeprofile_t;
+struct timeprofile
+{
+    int64_t utime;
+    zarray_t *stamps;
+};
+
+static inline timeprofile_t *timeprofile_create()
+{
+    timeprofile_t *tp = (timeprofile_t*) calloc(1, sizeof(timeprofile_t));
+    tp->stamps = zarray_create(sizeof(struct timeprofile_entry));
+
+    tp->utime = utime_now();
+
+    return tp;
+}
+
+static inline void timeprofile_destroy(timeprofile_t *tp)
+{
+    zarray_destroy(tp->stamps);
+    free(tp);
+}
+
+static inline void timeprofile_clear(timeprofile_t *tp)
+{
+    zarray_clear(tp->stamps);
+    tp->utime = utime_now();
+}
+
+static inline void timeprofile_stamp(timeprofile_t *tp, const char *name)
+{
+    struct timeprofile_entry tpe;
+
+    strncpy(tpe.name, name, sizeof(tpe.name));
+    tpe.name[sizeof(tpe.name)-1] = 0;
+    tpe.utime = utime_now();
+
+    zarray_add(tp->stamps, &tpe);
+}
+
+static inline void timeprofile_display(timeprofile_t *tp)
+{
+    int64_t lastutime = tp->utime;
+
+    for (int i = 0; i < zarray_size(tp->stamps); i++) {
+        struct timeprofile_entry *stamp;
+
+        zarray_get_volatile(tp->stamps, i, &stamp);
+
+        double cumtime = (stamp->utime - tp->utime)/1000000.0;
+
+        double parttime = (stamp->utime - lastutime)/1000000.0;
+
+        printf("%2d %32s %15f ms %15f ms\n", i, stamp->name, parttime*1000, cumtime*1000);
+
+        lastutime = stamp->utime;
+    }
+}
+
+static inline uint64_t timeprofile_total_utime(timeprofile_t *tp)
+{
+    if (zarray_size(tp->stamps) == 0)
+        return 0;
+
+    struct timeprofile_entry *first, *last;
+    zarray_get_volatile(tp->stamps, 0, &first);
+    zarray_get_volatile(tp->stamps, zarray_size(tp->stamps) - 1, &last);
+
+    return last->utime - first->utime;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/unionfind.c b/common/unionfind.c
new file mode 100644
index 0000000..ae42991
--- /dev/null
+++ b/common/unionfind.c
@@ -0,0 +1,30 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include "unionfind.h"
+#include <stdlib.h>
+#include <assert.h>
diff --git a/common/unionfind.h b/common/unionfind.h
new file mode 100644
index 0000000..f3530a7
--- /dev/null
+++ b/common/unionfind.h
@@ -0,0 +1,146 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct unionfind unionfind_t;
+
+struct unionfind
+{
+    uint32_t maxid;
+    struct ufrec *data;
+};
+
+struct ufrec
+{
+    // the parent of this node. If a node's parent is its own index,
+    // then it is a root.
+    uint32_t parent;
+
+    // for the root of a connected component, the number of components
+    // connected to it. For intermediate values, it's not meaningful.
+    uint32_t size;
+};
+
+static inline unionfind_t *unionfind_create(uint32_t maxid)
+{
+    unionfind_t *uf = (unionfind_t*) calloc(1, sizeof(unionfind_t));
+    uf->maxid = maxid;
+    uf->data = (struct ufrec*) malloc((maxid+1) * sizeof(struct ufrec));
+    for (uint32_t i = 0; i <= maxid; i++) {
+        uf->data[i].size = 1;
+        uf->data[i].parent = i;
+    }
+    return uf;
+}
+
+static inline void unionfind_destroy(unionfind_t *uf)
+{
+    free(uf->data);
+    free(uf);
+}
+
+/*
+static inline uint32_t unionfind_get_representative(unionfind_t *uf, uint32_t id)
+{
+    // base case: a node is its own parent
+    if (uf->data[id].parent == id)
+        return id;
+
+    // otherwise, recurse
+    uint32_t root = unionfind_get_representative(uf, uf->data[id].parent);
+
+    // short circuit the path. [XXX This write prevents tail recursion]
+    uf->data[id].parent = root;
+
+    return root;
+}
+*/
+
+// this one seems to be every-so-slightly faster than the recursive
+// version above.
+static inline uint32_t unionfind_get_representative(unionfind_t *uf, uint32_t id)
+{
+    uint32_t root = id;
+
+    // chase down the root
+    while (uf->data[root].parent != root) {
+        root = uf->data[root].parent;
+    }
+
+    // go back and collapse the tree.
+    while (uf->data[id].parent != root) {
+        uint32_t tmp = uf->data[id].parent;
+        uf->data[id].parent = root;
+        id = tmp;
+    }
+
+    return root;
+}
+
+static inline uint32_t unionfind_get_set_size(unionfind_t *uf, uint32_t id)
+{
+    uint32_t repid = unionfind_get_representative(uf, id);
+    return uf->data[repid].size;
+}
+
+static inline uint32_t unionfind_connect(unionfind_t *uf, uint32_t aid, uint32_t bid)
+{
+    uint32_t aroot = unionfind_get_representative(uf, aid);
+    uint32_t broot = unionfind_get_representative(uf, bid);
+
+    if (aroot == broot)
+        return aroot;
+
+    // we don't perform "union by rank", but we perform a similar
+    // operation (but probably without the same asymptotic guarantee):
+    // We join trees based on the number of *elements* (as opposed to
+    // rank) contained within each tree. I.e., we use size as a proxy
+    // for rank.  In my testing, it's often *faster* to use size than
+    // rank, perhaps because the rank of the tree isn't that critical
+    // if there are very few nodes in it.
+    uint32_t asize = uf->data[aroot].size;
+    uint32_t bsize = uf->data[broot].size;
+
+    // optimization idea: We could shortcut some or all of the tree
+    // that is grafted onto the other tree. Pro: those nodes were just
+    // read and so are probably in cache. Con: it might end up being
+    // wasted effort -- the tree might be grafted onto another tree in
+    // a moment!
+    if (asize > bsize) {
+        uf->data[broot].parent = aroot;
+        uf->data[aroot].size += bsize;
+        return aroot;
+    } else {
+        uf->data[aroot].parent = broot;
+        uf->data[broot].size += asize;
+        return broot;
+    }
+}
diff --git a/common/workerpool.c b/common/workerpool.c
new file mode 100644
index 0000000..a0170ef
--- /dev/null
+++ b/common/workerpool.c
@@ -0,0 +1,215 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+#include <errno.h>
+
+#define _GNU_SOURCE  // Possible fix for 16.04
+#define __USE_GNU
+#include "common/pthreads_cross.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
+#include "workerpool.h"
+#include "debug_print.h"
+
+struct workerpool {
+    int nthreads;
+    zarray_t *tasks;
+    int taskspos;
+
+    pthread_t *threads;
+    int *status;
+
+    pthread_mutex_t mutex;
+    pthread_cond_t startcond;   // used to signal the availability of work
+    pthread_cond_t endcond;     // used to signal completion of all work
+
+    int end_count; // how many threads are done?
+};
+
+struct task
+{
+    void (*f)(void *p);
+    void *p;
+};
+
+void *worker_thread(void *p)
+{
+    workerpool_t *wp = (workerpool_t*) p;
+
+    int cnt = 0;
+
+    while (1) {
+        struct task *task;
+
+        pthread_mutex_lock(&wp->mutex);
+        while (wp->taskspos == zarray_size(wp->tasks)) {
+            wp->end_count++;
+//          printf("%"PRId64" thread %d did %d\n", utime_now(), pthread_self(), cnt);
+            pthread_cond_broadcast(&wp->endcond);
+            pthread_cond_wait(&wp->startcond, &wp->mutex);
+            cnt = 0;
+//            printf("%"PRId64" thread %d awake\n", utime_now(), pthread_self());
+        }
+
+        zarray_get_volatile(wp->tasks, wp->taskspos, &task);
+        wp->taskspos++;
+        cnt++;
+        pthread_mutex_unlock(&wp->mutex);
+//        pthread_yield();
+        sched_yield();
+
+        // we've been asked to exit.
+        if (task->f == NULL)
+            return NULL;
+
+        task->f(task->p);
+    }
+
+    return NULL;
+}
+
+workerpool_t *workerpool_create(int nthreads)
+{
+    assert(nthreads > 0);
+
+    workerpool_t *wp = calloc(1, sizeof(workerpool_t));
+    wp->nthreads = nthreads;
+    wp->tasks = zarray_create(sizeof(struct task));
+
+    if (nthreads > 1) {
+        wp->threads = calloc(wp->nthreads, sizeof(pthread_t));
+
+        pthread_mutex_init(&wp->mutex, NULL);
+        pthread_cond_init(&wp->startcond, NULL);
+        pthread_cond_init(&wp->endcond, NULL);
+
+        for (int i = 0; i < nthreads; i++) {
+            int res = pthread_create(&wp->threads[i], NULL, worker_thread, wp);
+            if (res != 0) {
+                debug_print("Insufficient system resources to create workerpool threads\n");
+                // errno already set to EAGAIN by pthread_create() failure
+                return NULL;
+            }
+        }
+    }
+
+    return wp;
+}
+
+void workerpool_destroy(workerpool_t *wp)
+{
+    if (wp == NULL)
+        return;
+
+    // force all worker threads to exit.
+    if (wp->nthreads > 1) {
+        for (int i = 0; i < wp->nthreads; i++)
+            workerpool_add_task(wp, NULL, NULL);
+
+        pthread_mutex_lock(&wp->mutex);
+        pthread_cond_broadcast(&wp->startcond);
+        pthread_mutex_unlock(&wp->mutex);
+
+        for (int i = 0; i < wp->nthreads; i++)
+            pthread_join(wp->threads[i], NULL);
+
+        pthread_mutex_destroy(&wp->mutex);
+        pthread_cond_destroy(&wp->startcond);
+        pthread_cond_destroy(&wp->endcond);
+        free(wp->threads);
+    }
+
+    zarray_destroy(wp->tasks);
+    free(wp);
+}
+
+int workerpool_get_nthreads(workerpool_t *wp)
+{
+    return wp->nthreads;
+}
+
+void workerpool_add_task(workerpool_t *wp, void (*f)(void *p), void *p)
+{
+    struct task t;
+    t.f = f;
+    t.p = p;
+
+    zarray_add(wp->tasks, &t);
+}
+
+void workerpool_run_single(workerpool_t *wp)
+{
+    for (int i = 0; i < zarray_size(wp->tasks); i++) {
+        struct task *task;
+        zarray_get_volatile(wp->tasks, i, &task);
+        task->f(task->p);
+    }
+
+    zarray_clear(wp->tasks);
+}
+
+// runs all added tasks, waits for them to complete.
+void workerpool_run(workerpool_t *wp)
+{
+    if (wp->nthreads > 1) {
+        wp->end_count = 0;
+
+        pthread_mutex_lock(&wp->mutex);
+        pthread_cond_broadcast(&wp->startcond);
+
+        while (wp->end_count < wp->nthreads) {
+//            printf("caught %d\n", wp->end_count);
+            pthread_cond_wait(&wp->endcond, &wp->mutex);
+        }
+
+        pthread_mutex_unlock(&wp->mutex);
+
+        wp->taskspos = 0;
+
+        zarray_clear(wp->tasks);
+
+    } else {
+        workerpool_run_single(wp);
+    }
+}
+
+int workerpool_get_nprocs()
+{
+#ifdef WIN32
+    SYSTEM_INFO sysinfo;
+    GetSystemInfo(&sysinfo);
+    return sysinfo.dwNumberOfProcessors;
+#else
+    return sysconf (_SC_NPROCESSORS_ONLN);
+#endif
+}
diff --git a/common/workerpool.h b/common/workerpool.h
new file mode 100644
index 0000000..2c32ab1
--- /dev/null
+++ b/common/workerpool.h
@@ -0,0 +1,49 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include "zarray.h"
+
+typedef struct workerpool workerpool_t;
+
+// as a special case, if nthreads==1, no additional threads are
+// created, and workerpool_run will run synchronously.
+workerpool_t *workerpool_create(int nthreads);
+void workerpool_destroy(workerpool_t *wp);
+
+void workerpool_add_task(workerpool_t *wp, void (*f)(void *p), void *p);
+
+// runs all added tasks, waits for them to complete.
+void workerpool_run(workerpool_t *wp);
+
+// same as workerpool_run, except always single threaded. (mostly for debugging).
+void workerpool_run_single(workerpool_t *wp);
+
+int workerpool_get_nthreads(workerpool_t *wp);
+
+int workerpool_get_nprocs();
diff --git a/common/zarray.c b/common/zarray.c
new file mode 100644
index 0000000..43e6a7e
--- /dev/null
+++ b/common/zarray.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <string.h>
+#include <assert.h>
+
+#include "zarray.h"
+
+int zstrcmp(const void * a_pp, const void * b_pp)
+{
+    assert(a_pp != NULL);
+    assert(b_pp != NULL);
+
+    char * a = *(void**)a_pp;
+    char * b = *(void**)b_pp;
+
+    return strcmp(a,b);
+}
+
+void zarray_vmap(zarray_t *za, void (*f)())
+{
+    assert(za != NULL);
+    assert(f != NULL);
+    assert(za->el_sz == sizeof(void*));
+
+    for (int idx = 0; idx < za->size; idx++) {
+        void *pp = &za->data[idx*za->el_sz];
+        void *p = *(void**) pp;
+        f(p);
+    }
+}
diff --git a/common/zarray.h b/common/zarray.h
new file mode 100644
index 0000000..1a882c2
--- /dev/null
+++ b/common/zarray.h
@@ -0,0 +1,464 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stddef.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Defines a structure which acts as a resize-able array ala Java's ArrayList.
+ */
+typedef struct zarray zarray_t;
+struct zarray
+{
+    size_t el_sz; // size of each element
+
+    int size; // how many elements?
+    int alloc; // we've allocated storage for how many elements?
+    char *data;
+};
+
+/**
+ * Creates and returns a variable array structure capable of holding elements of
+ * the specified size. It is the caller's responsibility to call zarray_destroy()
+ * on the returned array when it is no longer needed.
+ */
+static inline zarray_t *zarray_create(size_t el_sz)
+{
+    assert(el_sz > 0);
+
+    zarray_t *za = (zarray_t*) calloc(1, sizeof(zarray_t));
+    za->el_sz = el_sz;
+    return za;
+}
+
+/**
+ * Frees all resources associated with the variable array structure which was
+ * created by zarray_create(). After calling, 'za' will no longer be valid for storage.
+ */
+static inline void zarray_destroy(zarray_t *za)
+{
+    if (za == NULL)
+        return;
+
+    if (za->data != NULL)
+        free(za->data);
+    memset(za, 0, sizeof(zarray_t));
+    free(za);
+}
+
+/** Allocate a new zarray that contains a copy of the data in the argument. **/
+static inline zarray_t *zarray_copy(const zarray_t *za)
+{
+    assert(za != NULL);
+
+    zarray_t *zb = (zarray_t*) calloc(1, sizeof(zarray_t));
+    zb->el_sz = za->el_sz;
+    zb->size = za->size;
+    zb->alloc = za->alloc;
+    zb->data = (char*) malloc(zb->alloc * zb->el_sz);
+    memcpy(zb->data, za->data, za->size * za->el_sz);
+    return zb;
+}
+
+static int iceillog2(int v)
+{
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+}
+
+/**
+ * Allocate a new zarray that contains a subset of the original
+ * elements. NOTE: end index is EXCLUSIVE, that is one past the last
+ * element you want.
+ */
+static inline zarray_t *zarray_copy_subset(const zarray_t *za,
+                             int start_idx,
+                             int end_idx_exclusive)
+{
+    zarray_t *out = (zarray_t*) calloc(1, sizeof(zarray_t));
+    out->el_sz = za->el_sz;
+    out->size = end_idx_exclusive - start_idx;
+    out->alloc = iceillog2(out->size); // round up pow 2
+    out->data = (char*) malloc(out->alloc * out->el_sz);
+    memcpy(out->data,  za->data +(start_idx*out->el_sz), out->size*out->el_sz);
+    return out;
+}
+
+/**
+ * Retrieves the number of elements currently being contained by the passed
+ * array, which may be different from its capacity. The index of the last element
+ * in the array will be one less than the returned value.
+ */
+static inline int zarray_size(const zarray_t *za)
+{
+    assert(za != NULL);
+
+    return za->size;
+}
+
+/**
+ * Returns 1 if zarray_size(za) == 0,
+ * returns 0 otherwise.
+ */
+/*
+JUST CALL zarray_size
+int zarray_isempty(const zarray_t *za)
+{
+    assert(za != NULL);
+    if (za->size <= 0)
+        return 1;
+    else
+        return 0;
+}
+*/
+
+
+/**
+ * Allocates enough internal storage in the supplied variable array structure to
+ * guarantee that the supplied number of elements (capacity) can be safely stored.
+ */
+static inline void zarray_ensure_capacity(zarray_t *za, int capacity)
+{
+    assert(za != NULL);
+
+    if (capacity <= za->alloc)
+        return;
+
+    while (za->alloc < capacity) {
+        za->alloc *= 2;
+        if (za->alloc < 8)
+            za->alloc = 8;
+    }
+
+    za->data = (char*) realloc(za->data, za->alloc * za->el_sz);
+}
+
+/**
+ * Adds a new element to the end of the supplied array, and sets its value
+ * (by copying) from the data pointed to by the supplied pointer 'p'.
+ * Automatically ensures that enough storage space is available for the new element.
+ */
+static inline void zarray_add(zarray_t *za, const void *p)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+
+    zarray_ensure_capacity(za, za->size + 1);
+
+    memcpy(&za->data[za->size*za->el_sz], p, za->el_sz);
+    za->size++;
+}
+
+/**
+ * Retrieves the element from the supplied array located at the zero-based
+ * index of 'idx' and copies its value into the variable pointed to by the pointer
+ * 'p'.
+ */
+static inline void zarray_get(const zarray_t *za, int idx, void *p)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+    assert(idx >= 0);
+    assert(idx < za->size);
+
+    memcpy(p, &za->data[idx*za->el_sz], za->el_sz);
+}
+
+/**
+ * Similar to zarray_get(), but returns a "live" pointer to the internal
+ * storage, avoiding a memcpy. This pointer is not valid across
+ * operations which might move memory around (i.e. zarray_remove_value(),
+ * zarray_remove_index(), zarray_insert(), zarray_sort(), zarray_clear()).
+ * 'p' should be a pointer to the pointer which will be set to the internal address.
+ */
+inline static void zarray_get_volatile(const zarray_t *za, int idx, void *p)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+    assert(idx >= 0);
+    assert(idx < za->size);
+
+    *((void**) p) = &za->data[idx*za->el_sz];
+}
+
+inline static void zarray_truncate(zarray_t *za, int sz)
+{
+   assert(za != NULL);
+   assert(sz <= za->size);
+   za->size = sz;
+}
+
+/**
+ * Removes the entry at index 'idx'.
+ * If shuffle is true, the last element in the array will be placed in
+ * the newly-open space; if false, the zarray is compacted.
+ */
+static inline void zarray_remove_index(zarray_t *za, int idx, int shuffle)
+{
+    assert(za != NULL);
+    assert(idx >= 0);
+    assert(idx < za->size);
+
+    if (shuffle) {
+        if (idx < za->size-1)
+            memcpy(&za->data[idx*za->el_sz], &za->data[(za->size-1)*za->el_sz], za->el_sz);
+        za->size--;
+        return;
+    } else {
+        // size = 10, idx = 7. Should copy 2 entries (at idx=8 and idx=9).
+        // size = 10, idx = 9. Should copy 0 entries.
+        int ncopy = za->size - idx - 1;
+        if (ncopy > 0)
+            memmove(&za->data[idx*za->el_sz], &za->data[(idx+1)*za->el_sz], ncopy*za->el_sz);
+        za->size--;
+        return;
+    }
+}
+
+/**
+ * Remove the entry whose value is equal to the value pointed to by 'p'.
+ * If shuffle is true, the last element in the array will be placed in
+ * the newly-open space; if false, the zarray is compacted. At most
+ * one element will be removed.
+ *
+ * Note that objects will be compared using memcmp over the full size
+ * of the value. If the value is a struct that contains padding,
+ * differences in the padding bytes can cause comparisons to
+ * fail. Thus, it remains best practice to bzero all structs so that
+ * the padding is set to zero.
+ *
+ * Returns the number of elements removed (0 or 1).
+ */
+// remove the entry whose value is equal to the value pointed to by p.
+// if shuffle is true, the last element in the array will be placed in
+// the newly-open space; if false, the zarray is compacted.
+static inline int zarray_remove_value(zarray_t *za, const void *p, int shuffle)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+
+    for (int idx = 0; idx < za->size; idx++) {
+        if (!memcmp(p, &za->data[idx*za->el_sz], za->el_sz)) {
+            zarray_remove_index(za, idx, shuffle);
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+
+/**
+ * Creates a new entry and inserts it into the array so that it will have the
+ * index 'idx' (i.e. before the item which currently has that index). The value
+ * of the new entry is set to (copied from) the data pointed to by 'p'. 'idx'
+ * can be one larger than the current max index to place the new item at the end
+ * of the array, or zero to add it to an empty array.
+ */
+static inline void zarray_insert(zarray_t *za, int idx, const void *p)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+    assert(idx >= 0);
+    assert(idx <= za->size);
+
+    zarray_ensure_capacity(za, za->size + 1);
+    // size = 10, idx = 7. Should copy three entries (idx=7, idx=8, idx=9)
+    int ncopy = za->size - idx;
+
+    memmove(&za->data[(idx+1)*za->el_sz], &za->data[idx*za->el_sz], ncopy*za->el_sz);
+    memcpy(&za->data[idx*za->el_sz], p, za->el_sz);
+
+    za->size++;
+}
+
+
+/**
+ * Sets the value of the current element at index 'idx' by copying its value from
+ * the data pointed to by 'p'. The previous value of the changed element will be
+ * copied into the data pointed to by 'outp' if it is not null.
+ */
+static inline void zarray_set(zarray_t *za, int idx, const void *p, void *outp)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+    assert(idx >= 0);
+    assert(idx < za->size);
+
+    if (outp != NULL)
+        memcpy(outp, &za->data[idx*za->el_sz], za->el_sz);
+
+    memcpy(&za->data[idx*za->el_sz], p, za->el_sz);
+}
+
+/**
+ * Calls the supplied function for every element in the array in index order.
+ * The map function will be passed a pointer to each element in turn and must
+ * have the following format:
+ *
+ * void map_function(element_type *element)
+ */
+static inline void zarray_map(zarray_t *za, void (*f)(void*))
+{
+    assert(za != NULL);
+    assert(f != NULL);
+
+    for (int idx = 0; idx < za->size; idx++)
+        f(&za->data[idx*za->el_sz]);
+}
+
+/**
+ * Calls the supplied function for every element in the array in index order.
+ * HOWEVER values are passed to the function, not pointers to values. In the
+ * case where the zarray stores object pointers, zarray_vmap allows you to
+ * pass in the object's destroy function (or free) directly. Can only be used
+ * with zarray's which contain pointer data. The map function should have the
+ * following format:
+ *
+ * void map_function(element_type *element)
+ */
+    void zarray_vmap(zarray_t *za, void (*f)());
+
+/**
+ * Removes all elements from the array and sets its size to zero. Pointers to
+ * any data elements obtained i.e. by zarray_get_volatile() will no longer be
+ * valid.
+ */
+static inline void zarray_clear(zarray_t *za)
+{
+    assert(za != NULL);
+    za->size = 0;
+}
+
+/**
+ * Determines whether any element in the array has a value which matches the
+ * data pointed to by 'p'.
+ *
+ * Returns 1 if a match was found anywhere in the array, else 0.
+ */
+static inline int zarray_contains(const zarray_t *za, const void *p)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+
+    for (int idx = 0; idx < za->size; idx++) {
+        if (!memcmp(p, &za->data[idx*za->el_sz], za->el_sz)) {
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Uses qsort() to sort the elements contained by the array in ascending order.
+ * Uses the supplied comparison function to determine the appropriate order.
+ *
+ * The comparison function will be passed a pointer to two elements to be compared
+ * and should return a measure of the difference between them (see strcmp()).
+ * I.e. it should return a negative number if the first element is 'less than'
+ * the second, zero if they are equivalent, and a positive number if the first
+ * element is 'greater than' the second. The function should have the following format:
+ *
+ * int comparison_function(const element_type *first, const element_type *second)
+ *
+ * zstrcmp() can be used as the comparison function for string elements, which
+ * will call strcmp() internally.
+ */
+static inline void zarray_sort(zarray_t *za, int (*compar)(const void*, const void*))
+{
+    assert(za != NULL);
+    assert(compar != NULL);
+    if (za->size == 0)
+        return;
+
+    qsort(za->data, za->size, za->el_sz, compar);
+}
+
+/**
+ * A comparison function for comparing strings which can be used by zarray_sort()
+ * to sort arrays with char* elements.
+ */
+    int zstrcmp(const void * a_pp, const void * b_pp);
+
+/**
+  * Find the index of an element, or return -1 if not found. Remember that p is
+  * a pointer to the element.
+ **/
+// returns -1 if not in array. Remember p is a pointer to the item.
+static inline int zarray_index_of(const zarray_t *za, const void *p)
+{
+    assert(za != NULL);
+    assert(p != NULL);
+
+    for (int i = 0; i < za->size; i++) {
+        if (!memcmp(p, &za->data[i*za->el_sz], za->el_sz))
+            return i;
+    }
+
+    return -1;
+}
+
+
+
+/**
+ * Add all elements from 'source' into 'dest'. el_size must be the same
+ * for both lists
+ **/
+static inline void zarray_add_all(zarray_t * dest, const zarray_t * source)
+{
+    assert(dest->el_sz == source->el_sz);
+
+    // Don't allocate on stack because el_sz could be larger than ~8 MB
+    // stack size
+    char *tmp = (char*)calloc(1, dest->el_sz);
+
+    for (int i = 0; i < zarray_size(source); i++) {
+        zarray_get(source, i, tmp);
+        zarray_add(dest, tmp);
+   }
+
+    free(tmp);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/zhash.c b/common/zhash.c
new file mode 100644
index 0000000..914f530
--- /dev/null
+++ b/common/zhash.c
@@ -0,0 +1,563 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "zhash.h"
+
+// force a rehash when our capacity is less than this many times the size
+#define ZHASH_FACTOR_CRITICAL 2
+
+// When resizing, how much bigger do we want to be? (should be greater than _CRITICAL)
+#define ZHASH_FACTOR_REALLOC 4
+
+struct zhash
+{
+    size_t keysz, valuesz;
+    int    entrysz; // valid byte (1) + keysz + values
+
+    uint32_t(*hash)(const void *a);
+
+    // returns 1 if equal
+    int(*equals)(const void *a, const void *b);
+
+    int size; // # of items in hash table
+
+    char *entries; // each entry of size entrysz;
+    int  nentries; // how many entries are allocated? Never 0.
+};
+
+zhash_t *zhash_create_capacity(size_t keysz, size_t valuesz,
+                               uint32_t(*hash)(const void *a), int(*equals)(const void *a, const void*b),
+                               int capacity)
+{
+    assert(hash != NULL);
+    assert(equals != NULL);
+
+    // resize...
+    int _nentries = ZHASH_FACTOR_REALLOC * capacity;
+    if (_nentries < 8)
+        _nentries = 8;
+
+    // to a power of 2.
+    int nentries = _nentries;
+    if ((nentries & (nentries - 1)) != 0) {
+        nentries = 8;
+        while (nentries < _nentries)
+            nentries *= 2;
+    }
+
+    zhash_t *zh = (zhash_t*) calloc(1, sizeof(zhash_t));
+    zh->keysz = keysz;
+    zh->valuesz = valuesz;
+    zh->hash = hash;
+    zh->equals = equals;
+    zh->nentries = nentries;
+
+    zh->entrysz = 1 + zh->keysz + zh->valuesz;
+
+    zh->entries = calloc(zh->nentries, zh->entrysz);
+    zh->nentries = nentries;
+
+    return zh;
+}
+
+zhash_t *zhash_create(size_t keysz, size_t valuesz,
+                      uint32_t(*hash)(const void *a), int(*equals)(const void *a, const void *b))
+{
+    return zhash_create_capacity(keysz, valuesz, hash, equals, 8);
+}
+
+void zhash_destroy(zhash_t *zh)
+{
+    if (zh == NULL)
+        return;
+
+    free(zh->entries);
+    free(zh);
+}
+
+int zhash_size(const zhash_t *zh)
+{
+    return zh->size;
+}
+
+void zhash_clear(zhash_t *zh)
+{
+    memset(zh->entries, 0, zh->nentries * zh->entrysz);
+    zh->size = 0;
+}
+
+int zhash_get_volatile(const zhash_t *zh, const void *key, void *out_value)
+{
+    uint32_t code = zh->hash(key);
+    uint32_t entry_idx = code & (zh->nentries - 1);
+
+    while (zh->entries[entry_idx * zh->entrysz]) {
+        void *this_key = &zh->entries[entry_idx * zh->entrysz + 1];
+        if (zh->equals(key, this_key)) {
+            *((void**) out_value) = &zh->entries[entry_idx * zh->entrysz + 1 + zh->keysz];
+            return 1;
+        }
+
+        entry_idx = (entry_idx + 1) & (zh->nentries - 1);
+    }
+
+    return 0;
+}
+
+int zhash_get(const zhash_t *zh, const void *key, void *out_value)
+{
+    void *tmp;
+    if (zhash_get_volatile(zh, key, &tmp)) {
+        memcpy(out_value, tmp, zh->valuesz);
+        return 1;
+    }
+
+    return 0;
+}
+
+int zhash_put(zhash_t *zh, const void *key, const void *value, void *oldkey, void *oldvalue)
+{
+    uint32_t code = zh->hash(key);
+    uint32_t entry_idx = code & (zh->nentries - 1);
+
+    while (zh->entries[entry_idx * zh->entrysz]) {
+        void *this_key = &zh->entries[entry_idx * zh->entrysz + 1];
+        void *this_value = &zh->entries[entry_idx * zh->entrysz + 1 + zh->keysz];
+
+        if (zh->equals(key, this_key)) {
+            // replace
+            if (oldkey)
+                memcpy(oldkey, this_key, zh->keysz);
+            if (oldvalue)
+                memcpy(oldvalue, this_value, zh->valuesz);
+            memcpy(this_key, key, zh->keysz);
+            memcpy(this_value, value, zh->valuesz);
+            zh->entries[entry_idx * zh->entrysz] = 1; // mark valid
+            return 1;
+        }
+
+        entry_idx = (entry_idx + 1) & (zh->nentries - 1);
+    }
+
+    // add the entry
+    zh->entries[entry_idx * zh->entrysz] = 1;
+    memcpy(&zh->entries[entry_idx * zh->entrysz + 1], key, zh->keysz);
+    memcpy(&zh->entries[entry_idx * zh->entrysz + 1 + zh->keysz], value, zh->valuesz);
+    zh->size++;
+
+    if (zh->nentries < ZHASH_FACTOR_CRITICAL * zh->size) {
+        zhash_t *newhash = zhash_create_capacity(zh->keysz, zh->valuesz,
+                                                 zh->hash, zh->equals,
+                                                 zh->size);
+
+        for (int idx = 0; idx < zh->nentries; idx++) {
+
+            if (zh->entries[idx * zh->entrysz]) {
+                void *this_key = &zh->entries[idx * zh->entrysz + 1];
+                void *this_value = &zh->entries[idx * zh->entrysz + 1 + zh->keysz];
+                if (zhash_put(newhash, this_key, this_value, NULL, NULL))
+                    assert(0); // shouldn't already be present.
+            }
+        }
+
+        // play switch-a-roo
+        zhash_t tmp;
+        memcpy(&tmp, zh, sizeof(zhash_t));
+        memcpy(zh, newhash, sizeof(zhash_t));
+        memcpy(newhash, &tmp, sizeof(zhash_t));
+        zhash_destroy(newhash);
+    }
+
+    return 0;
+}
+
+int zhash_remove(zhash_t *zh, const void *key, void *old_key, void *old_value)
+{
+    uint32_t code = zh->hash(key);
+    uint32_t entry_idx = code & (zh->nentries - 1);
+
+    while (zh->entries[entry_idx * zh->entrysz]) {
+        void *this_key = &zh->entries[entry_idx * zh->entrysz + 1];
+        void *this_value = &zh->entries[entry_idx * zh->entrysz + 1 + zh->keysz];
+
+        if (zh->equals(key, this_key)) {
+            if (old_key)
+                memcpy(old_key, this_key, zh->keysz);
+            if (old_value)
+                memcpy(old_value, this_value, zh->valuesz);
+
+            // mark this entry as available
+            zh->entries[entry_idx * zh->entrysz] = 0;
+            zh->size--;
+
+            // reinsert any consecutive entries that follow
+            while (1) {
+                entry_idx = (entry_idx + 1) & (zh->nentries - 1);
+
+                if (zh->entries[entry_idx * zh->entrysz]) {
+                    // completely remove this entry
+                    char *tmp = malloc(sizeof(char)*zh->entrysz);
+                    memcpy(tmp, &zh->entries[entry_idx * zh->entrysz], zh->entrysz);
+                    zh->entries[entry_idx * zh->entrysz] = 0;
+                    zh->size--;
+                    // reinsert it
+                    if (zhash_put(zh, &tmp[1], &tmp[1+zh->keysz], NULL, NULL))
+                        assert(0);
+                    free(tmp);
+                } else {
+                    break;
+                }
+            }
+            return 1;
+        }
+
+        entry_idx = (entry_idx + 1) & (zh->nentries - 1);
+    }
+
+    return 0;
+}
+
+zhash_t *zhash_copy(const zhash_t *zh)
+{
+    zhash_t *newhash = zhash_create_capacity(zh->keysz, zh->valuesz,
+                                             zh->hash, zh->equals,
+                                             zh->size);
+
+    for (int entry_idx = 0; entry_idx < zh->nentries; entry_idx++) {
+        if (zh->entries[entry_idx * zh->entrysz]) {
+            void *this_key = &zh->entries[entry_idx * zh->entrysz + 1];
+            void *this_value = &zh->entries[entry_idx * zh->entrysz + 1 + zh->keysz];
+            if (zhash_put(newhash, this_key, this_value, NULL, NULL))
+                assert(0); // shouldn't already be present.
+        }
+    }
+
+    return newhash;
+}
+
+int zhash_contains(const zhash_t *zh, const void *key)
+{
+    void *tmp;
+    return zhash_get_volatile(zh, key, &tmp);
+}
+
+void zhash_iterator_init(zhash_t *zh, zhash_iterator_t *zit)
+{
+    zit->zh = zh;
+    zit->czh = zh;
+    zit->last_entry = -1;
+}
+
+void zhash_iterator_init_const(const zhash_t *zh, zhash_iterator_t *zit)
+{
+    zit->zh = NULL;
+    zit->czh = zh;
+    zit->last_entry = -1;
+}
+
+int zhash_iterator_next_volatile(zhash_iterator_t *zit, void *outkey, void *outvalue)
+{
+    const zhash_t *zh = zit->czh;
+
+    while (1) {
+        if (zit->last_entry + 1 >= zh->nentries)
+            return 0;
+
+        zit->last_entry++;
+
+        if (zh->entries[zit->last_entry * zh->entrysz]) {
+            void *this_key = &zh->entries[zit->last_entry * zh->entrysz + 1];
+            void *this_value = &zh->entries[zit->last_entry * zh->entrysz + 1 + zh->keysz];
+
+            if (outkey != NULL)
+                *((void**) outkey) = this_key;
+            if (outvalue != NULL)
+                *((void**) outvalue) = this_value;
+
+            return 1;
+        }
+    }
+}
+
+int zhash_iterator_next(zhash_iterator_t *zit, void *outkey, void *outvalue)
+{
+    const zhash_t *zh = zit->czh;
+
+    void *outkeyp, *outvaluep;
+
+    if (!zhash_iterator_next_volatile(zit, &outkeyp, &outvaluep))
+        return 0;
+
+    if (outkey != NULL)
+        memcpy(outkey, outkeyp, zh->keysz);
+    if (outvalue != NULL)
+        memcpy(outvalue, outvaluep, zh->valuesz);
+
+    return 1;
+}
+
+void zhash_iterator_remove(zhash_iterator_t *zit)
+{
+    assert(zit->zh); // can't call _remove on a iterator with const zhash
+    zhash_t *zh = zit->zh;
+
+    zh->entries[zit->last_entry * zh->entrysz] = 0;
+    zh->size--;
+
+    // re-insert following entries
+    int entry_idx = (zit->last_entry + 1) & (zh->nentries - 1);
+    while (zh->entries[entry_idx *zh->entrysz]) {
+        // completely remove this entry
+        char *tmp = malloc(sizeof(char)*zh->entrysz);
+        memcpy(tmp, &zh->entries[entry_idx * zh->entrysz], zh->entrysz);
+        zh->entries[entry_idx * zh->entrysz] = 0;
+        zh->size--;
+
+        // reinsert it
+        if (zhash_put(zh, &tmp[1], &tmp[1+zh->keysz], NULL, NULL))
+            assert(0);
+        free(tmp);
+
+        entry_idx = (entry_idx + 1) & (zh->nentries - 1);
+    }
+
+    zit->last_entry--;
+}
+
+void zhash_map_keys(zhash_t *zh, void (*f)())
+{
+    assert(zh != NULL);
+    if (f == NULL)
+        return;
+
+    zhash_iterator_t itr;
+    zhash_iterator_init(zh, &itr);
+
+    void *key, *value;
+
+    while(zhash_iterator_next_volatile(&itr, &key, &value)) {
+        f(key);
+    }
+}
+
+void zhash_vmap_keys(zhash_t * zh, void (*f)())
+{
+    assert(zh != NULL);
+    if (f == NULL)
+        return;
+
+    zhash_iterator_t itr;
+    zhash_iterator_init(zh, &itr);
+
+    void *key, *value;
+
+    while(zhash_iterator_next_volatile(&itr, &key, &value)) {
+        void *p = *(void**) key;
+        f(p);
+    }
+}
+
+void zhash_map_values(zhash_t * zh, void (*f)())
+{
+    assert(zh != NULL);
+    if (f == NULL)
+        return;
+
+    zhash_iterator_t itr;
+    zhash_iterator_init(zh, &itr);
+
+    void *key, *value;
+    while(zhash_iterator_next_volatile(&itr, &key, &value)) {
+        f(value);
+    }
+}
+
+void zhash_vmap_values(zhash_t * zh, void (*f)())
+{
+    assert(zh != NULL);
+    if (f == NULL)
+        return;
+
+    zhash_iterator_t itr;
+    zhash_iterator_init(zh, &itr);
+
+    void *key, *value;
+    while(zhash_iterator_next_volatile(&itr, &key, &value)) {
+        void *p = *(void**) value;
+        f(p);
+    }
+}
+
+zarray_t *zhash_keys(const zhash_t *zh)
+{
+    assert(zh != NULL);
+
+    zarray_t *za = zarray_create(zh->keysz);
+
+    zhash_iterator_t itr;
+    zhash_iterator_init_const(zh, &itr);
+
+    void *key, *value;
+    while(zhash_iterator_next_volatile(&itr, &key, &value)) {
+        zarray_add(za, key);
+    }
+
+    return za;
+}
+
+zarray_t *zhash_values(const zhash_t *zh)
+{
+    assert(zh != NULL);
+
+    zarray_t *za = zarray_create(zh->valuesz);
+
+    zhash_iterator_t itr;
+    zhash_iterator_init_const(zh, &itr);
+
+    void *key, *value;
+    while(zhash_iterator_next_volatile(&itr, &key, &value)) {
+        zarray_add(za, value);
+    }
+
+    return za;
+}
+
+
+uint32_t zhash_uint32_hash(const void *_a)
+{
+    assert(_a != NULL);
+
+    uint32_t a = *((uint32_t*) _a);
+    return a;
+}
+
+int zhash_uint32_equals(const void *_a, const void *_b)
+{
+    assert(_a != NULL);
+    assert(_b != NULL);
+
+    uint32_t a = *((uint32_t*) _a);
+    uint32_t b = *((uint32_t*) _b);
+
+    return a==b;
+}
+
+uint32_t zhash_uint64_hash(const void *_a)
+{
+    assert(_a != NULL);
+
+    uint64_t a = *((uint64_t*) _a);
+    return (uint32_t) (a ^ (a >> 32));
+}
+
+int zhash_uint64_equals(const void *_a, const void *_b)
+{
+    assert(_a != NULL);
+    assert(_b != NULL);
+
+    uint64_t a = *((uint64_t*) _a);
+    uint64_t b = *((uint64_t*) _b);
+
+    return a==b;
+}
+
+
+union uintpointer
+{
+    const void *p;
+    uint32_t i;
+};
+
+uint32_t zhash_ptr_hash(const void *a)
+{
+    assert(a != NULL);
+
+    union uintpointer ip;
+    ip.p = * (void**)a;
+
+    // compute a hash from the lower 32 bits of the pointer (on LE systems)
+    uint32_t hash = ip.i;
+    hash ^= (hash >> 7);
+
+    return hash;
+}
+
+
+int zhash_ptr_equals(const void *a, const void *b)
+{
+    assert(a != NULL);
+    assert(b != NULL);
+
+    const void * ptra = * (void**)a;
+    const void * ptrb = * (void**)b;
+    return  ptra == ptrb;
+}
+
+
+int zhash_str_equals(const void *_a, const void *_b)
+{
+    assert(_a != NULL);
+    assert(_b != NULL);
+
+    char *a = * (char**)_a;
+    char *b = * (char**)_b;
+
+    return !strcmp(a, b);
+}
+
+uint32_t zhash_str_hash(const void *_a)
+{
+    assert(_a != NULL);
+
+    char *a = * (char**)_a;
+
+    uint32_t hash = 0;
+    while (*a != 0) {
+        // optimization of hash x FNV_prime
+        hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
+        hash ^= *a;
+        a++;
+    }
+
+    return hash;
+}
+
+
+void zhash_debug(zhash_t *zh)
+{
+    for (int entry_idx = 0; entry_idx < zh->nentries; entry_idx++) {
+        char *k, *v;
+        memcpy(&k, &zh->entries[entry_idx * zh->entrysz + 1], sizeof(char*));
+        memcpy(&v, &zh->entries[entry_idx * zh->entrysz + 1 + zh->keysz], sizeof(char*));
+        printf("%d: %d, %s => %s\n", entry_idx, zh->entries[entry_idx * zh->entrysz], k, v);
+    }
+}
diff --git a/common/zhash.h b/common/zhash.h
new file mode 100644
index 0000000..f3dee1a
--- /dev/null
+++ b/common/zhash.h
@@ -0,0 +1,432 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "zarray.h"
+
+
+/**
+ * A hash table for structs and primitive types that stores entries by value.
+ *   - The size of the key/values must be known at instantiation time, and remain fixed.
+ *     e.g. for pointers: zhash_create(sizeof(void*), sizeof(void*)....)
+ *          for structs: zhash_create(sizeof(struct key_struct), sizeof(struct value_struct)...)
+ *          for bytes: zhash_create(sizeof(uint8_t), sizeof(uint8_t)...)
+ *   - Entries are copied by value. This means you must always pass a reference to the start
+ *     of 'key_size' and 'value_size' bytes, which you have already malloc'd or stack allocated
+ *   - This data structure can be used to store types of any size, from bytes & doubles to
+ *     user defined structs
+ *     Note: if zhash stores pointers, user must be careful to manually manage the lifetime
+ *     of the memory they point to.
+ *
+ */
+
+typedef struct zhash zhash_t;
+
+// The contents of the iterator should be considered private. However,
+// since our usage model prefers stack-based allocation of iterators,
+// we must publicly declare them.
+struct zhash_iterator
+{
+    zhash_t *zh;
+    const zhash_t *czh;
+    int last_entry; // points to the last entry returned by _next
+};
+
+typedef struct zhash_iterator zhash_iterator_t;
+
+/**
+ * Create, initializes, and returns an empty hash table structure. It is the
+ * caller's responsibility to call zhash_destroy() on the returned array when it
+ * is no longer needed.
+ *
+ * The size of values used in the hash and equals function must match 'keysz'.
+ * I.e. if keysz = sizeof(uint64_t), then hash() and equals() should accept
+ * parameters as *uint64_t.
+ */
+zhash_t *zhash_create(size_t keysz, size_t valuesz,
+                      uint32_t(*hash)(const void *a),
+                      int(*equals)(const void *a, const void *b));
+
+/**
+ * Frees all resources associated with the hash table structure which was
+ * created by zhash_create(). After calling, 'zh' will no longer be valid for storage.
+ *
+ * If 'zh' contains pointer data, it is the caller's responsibility to manage
+ * the resources pointed to by those pointers.
+ */
+void zhash_destroy(zhash_t *zh);
+
+/**
+ * Creates and returns a new identical copy of the zhash (i.e. a "shallow" copy).
+ * If you're storing pointers, be sure not to double free their pointees!
+ * It is the caller's responsibility to call zhash_destroy() on the returned array
+ * when it is no longer needed (in addition to the zhash_destroy() call for the
+ * original zhash).
+ */
+zhash_t * zhash_copy(const zhash_t* other);
+
+/**
+ * Determines whether the supplied key value exists as an entry in the zhash
+ * table. If zhash stores pointer types as keys, this function can differentiate
+ * between a non-existent key and a key mapped to NULL.
+ * Returns 1 if the supplied key exists in the zhash table, else 0.
+ */
+int zhash_contains(const zhash_t *zh, const void *key);
+
+/**
+ * Retrieves the value for the given key, if it exists, by copying its contents
+ * into the space pointed to by 'out_value', which must already be allocated.
+ * Returns 1 if the supplied key exists in the table, else 0, in which case
+ * the contents of 'out_value' will be unchanged.
+ */
+int zhash_get(const zhash_t *zh, const void *key, void *out_value);
+
+/**
+ * Similar to zhash_get(), but more dangerous. Provides a pointer to the zhash's
+ * internal storage.  This can be used to make simple modifications to
+ * the underlying data while avoiding the memcpys associated with
+ * zhash_get and zhash_put. However, some zhash operations (that
+ * resize the underlying storage, in particular) render this pointer
+ * invalid. For maximum safety, call no other zhash functions for the
+ * period during which you intend to use the pointer.
+ * 'out_p' should be a pointer to the pointer which will be set to the internal
+ * data address.
+ */
+int zhash_get_volatile(const zhash_t *zh, const void *key, void *out_p);
+
+/**
+ * Adds a key/value pair to the hash table, if the supplied key does not already
+ * exist in the table, or overwrites the value for the supplied key if it does
+ * already exist. In the latter case, the previous contents of the key and value
+ * will be copied into the spaces pointed to by 'oldkey' and 'oldvalue', respectively,
+ * if they are not NULL.
+ *
+ * The key/value is added to / updated in the hash table by copying 'keysz' bytes
+ * from the data pointed to by 'key' and 'valuesz' bytes from the data pointed
+ * to by 'value'. It is up to the caller to manage the memory allocation of the
+ * passed-in values, zhash will store and manage a copy.
+ *
+ * NOTE: If the key is a pointer type (such as a string), the contents of the
+ * data that it points to must not be modified after the call to zhash_put(),
+ * or future zhash calls will not successfully locate the key (using either its
+ * previous or new value).
+ *
+ * NOTE: When using array data as a key (such as a string), the array should not
+ * be passed directly or it will cause a segmentation fault when it is dereferenced.
+ * Instead, pass a pointer which points to the array location, i.e.:
+ *   char key[strlen];
+ *   char *keyptr = key;
+ *   zhash_put(zh, &keyptr, ...)
+ *
+ * Example:
+ *   char * key = ...;
+ *   zarray_t * val = ...;
+ *   char * old_key = NULL;
+ *   zarray_t * old_val = NULL;
+ *   if (zhash_put(zh, &key, &val, &old_key, &old_value))
+ *       // manage resources for old_key and old_value
+ *
+ * Returns 1 if the supplied key previously existed in the table, else 0, in
+ * which case the data pointed to by 'oldkey' and 'oldvalue' will be set to zero
+ * if they are not NULL.
+ */
+int zhash_put(zhash_t *zh, const void *key, const void *value, void *oldkey, void *oldvalue);
+
+/**
+ * Removes from the zhash table the key/value pair for the supplied key, if
+ * it exists. If it does, the contents of the key and value will be copied into
+ * the spaces pointed to by 'oldkey' and 'oldvalue', respectively, if they are
+ * not NULL. If the key does not exist, the data pointed to by 'oldkey' and
+ * 'oldvalue' will be set to zero if they are not NULL.
+ *
+ * Returns 1 if the key existed and was removed, else 0, indicating that the
+ * table contents were not changed.
+ */
+int zhash_remove(zhash_t *zh, const void *key, void *oldkey, void *oldvalue);
+
+/**
+ * Removes all entries in the has table to create the equivalent of starting from
+ * a zhash_create(), using the same size parameters. If any elements need to be
+ * freed manually, this will need to occur before calling clear.
+ */
+void zhash_clear(zhash_t *zh);
+
+/**
+ * Retrieves the current number of key/value pairs currently contained in the
+ * zhash table, or 0 if the table is empty.
+ */
+int zhash_size(const zhash_t *zh);
+
+/**
+ * Initializes an iterator which can be used to traverse the key/value pairs of
+ * the supplied zhash table via successive calls to zhash_iterator_next() or
+ * zhash_iterator_next_volatile(). The iterator can also be used to remove elements
+ * from the zhash with zhash_iterator_remove().
+ *
+ * Any modifications to the zhash table structure will invalidate the
+ * iterator, with the exception of zhash_iterator_remove().
+ */
+void zhash_iterator_init(zhash_t *zh, zhash_iterator_t *zit);
+
+/**
+ * Initializes an iterator which can be used to traverse the key/value pairs of
+ * the supplied zhash table via successive calls to zhash_iterator_next() or
+ * zhash_iterator_next_volatile().
+ *
+ * An iterator initialized with this function cannot be used with
+ * zhash_iterator_remove(). For that you must use zhash_iterator_init().
+ *
+ * Any modifications to the zhash table structure will invalidate the
+ * iterator.
+ */
+void zhash_iterator_init_const(const zhash_t *zh, zhash_iterator_t *zit);
+
+/**
+ * Retrieves the next key/value pair from a zhash table via the (previously-
+ * initialized) iterator. Copies the key and value data into the space
+ * pointed to by outkey and outvalue, respectively, if they are not NULL.
+ *
+ * Returns 1 if the call retrieved the next available key/value pair, else 0
+ * indicating that no entries remain, in which case the contents of outkey and
+ * outvalue will remain unchanged.
+ */
+int zhash_iterator_next(zhash_iterator_t *zit, void *outkey, void *outvalue);
+
+/**
+ * Similar to zhash_iterator_next() except that it retrieves a pointer to zhash's
+ * internal storage.  This can be used to avoid the memcpys associated with
+ * zhash_iterator_next(). Call no other zhash functions for the
+ * period during which you intend to use the pointer.
+ * 'outkey' and 'outvalue' should be pointers to the pointers which will be set
+ * to the internal data addresses.
+ *
+ * Example:
+ *   key_t *outkey;
+ *   value_t *outvalue;
+ *   if (zhash_iterator_next_volatile(&zit, &outkey, &outvalue))
+ *       // access internal key and value storage via outkey and outvalue
+ *
+ * Returns 1 if the call retrieved the next available key/value pair, else 0
+ * indicating that no entries remain, in which case the pointers outkey and
+ * outvalue will remain unchanged.
+ */
+int zhash_iterator_next_volatile(zhash_iterator_t *zit, void *outkey, void *outvalue);
+
+/**
+ * Removes from the zhash table the key/value pair most recently returned via
+ * a call to zhash_iterator_next() or zhash_iterator_next_volatile() for the
+ * supplied iterator.
+ *
+ * Requires that the iterator was initialized with zhash_iterator_init(),
+ * not zhash_iterator_init_const().
+ */
+void zhash_iterator_remove(zhash_iterator_t *zit);
+
+/**
+ * Calls the supplied function with a pointer to every key in the hash table in
+ * turn. The function will be passed a pointer to the table's internal storage
+ * for the key, which the caller should not modify, as the hash table will not be
+ * re-indexed. The function may be NULL, in which case no action is taken.
+ */
+void zhash_map_keys(zhash_t *zh, void (*f)());
+
+/**
+ * Calls the supplied function with a pointer to every value in the hash table in
+ * turn. The function will be passed a pointer to the table's internal storage
+ * for the value, which the caller may safely modify. The function may be NULL,
+ * in which case no action is taken.
+ */
+void zhash_map_values(zhash_t *zh, void (*f)());
+
+/**
+ * Calls the supplied function with a copy of every key in the hash table in
+ * turn. While zhash_map_keys() passes a pointer to internal storage, this function
+ * passes a copy of the actual storage. If the zhash stores pointers to data,
+ * functions like free() can be used directly with zhash_vmap_keys().
+ * The function may be NULL, in which case no action is taken.
+ *
+ * NOTE: zhash_vmap_keys() can only be used with pointer-data keys.
+ * Use with non-pointer keys (i.e. integer, double, etc.) will likely cause a
+ * segmentation fault.
+ */
+void zhash_vmap_keys(zhash_t *vh, void (*f)());
+
+/**
+ * Calls the supplied function with a copy of every value in the hash table in
+ * turn. While zhash_map_values() passes a pointer to internal storage, this function
+ * passes a copy of the actual storage. If the zhash stores pointers to data,
+ * functions like free() can be used directly with zhash_vmap_values().
+ * The function may be NULL, in which case no action is taken.
+ *
+ * NOTE: zhash_vmap_values() can only be used with pointer-data values.
+ * Use with non-pointer values (i.e. integer, double, etc.) will likely cause a
+ * segmentation fault.
+ */
+void zhash_vmap_values(zhash_t *vh, void (*f)());
+
+/**
+ * Returns an array which contains copies of all of the hash table's keys, in no
+ * particular order. It is the caller's responsibility to call zarray_destroy()
+ * on the returned structure when it is no longer needed.
+ */
+zarray_t *zhash_keys(const zhash_t *zh);
+
+/**
+ * Returns an array which contains copies of all of the hash table's values, in no
+ * particular order. It is the caller's responsibility to call zarray_destroy()
+ * on the returned structure when it is no longer needed.
+ */
+zarray_t *zhash_values(const zhash_t *zh);
+
+/**
+ * Defines a hash function which will calculate a zhash value for uint32_t input
+ * data. Can be used with zhash_create() for a key size of sizeof(uint32_t).
+ */
+uint32_t zhash_uint32_hash(const void *a);
+
+/**
+ * Defines a function to compare zhash values for uint32_t input data.
+ * Can be used with zhash_create() for a key size of sizeof(uint32_t).
+ */
+int zhash_uint32_equals(const void *a, const void *b);
+
+/**
+ * Defines a hash function which will calculate a zhash value for uint64_t input
+ * data. Can be used with zhash_create() for a key size of sizeof(uint64_t).
+ */
+uint32_t zhash_uint64_hash(const void *a);
+
+/**
+ * Defines a function to compare zhash values for uint64_t input data.
+ * Can be used with zhash_create() for a key size of sizeof(uint64_t).
+ */
+int zhash_uint64_equals(const void *a, const void *b);
+
+/////////////////////////////////////////////////////
+// functions for keys that can be compared via their pointers.
+/**
+ * Defines a hash function which will calculate a zhash value for pointer input
+ * data. Can be used with zhash_create() for a key size of sizeof(void*). Will
+ * use only the pointer value itself for computing the hash value.
+ */
+uint32_t zhash_ptr_hash(const void *a);
+
+/**
+ * Defines a function to compare zhash values for pointer input data.
+ * Can be used with zhash_create() for a key size of sizeof(void*).
+ */
+int zhash_ptr_equals(const void *a, const void *b);
+
+/////////////////////////////////////////////////////
+// Functions for string-typed keys
+/**
+ * Defines a hash function which will calculate a zhash value for string input
+ * data. Can be used with zhash_create() for a key size of sizeof(char*). Will
+ * use the contents of the string in computing the hash value.
+ */
+uint32_t zhash_str_hash(const void *a);
+
+/**
+ * Defines a function to compare zhash values for string input data.
+ * Can be used with zhash_create() for a key size of sizeof(char*).
+ */
+int zhash_str_equals(const void *a, const void *b);
+
+void zhash_debug(zhash_t *zh);
+
+    static inline zhash_t *zhash_str_str_create(void)
+    {
+        return zhash_create(sizeof(char*), sizeof(char*),
+                            zhash_str_hash, zhash_str_equals);
+    }
+
+
+
+// for zhashes that map strings to strings, this is a convenience
+// function that allows easier retrieval of values. NULL is returned
+// if the key is not found.
+static inline char *zhash_str_str_get(zhash_t *zh, const char *key)
+{
+    char *value;
+    if (zhash_get(zh, &key, &value))
+        return value;
+    return NULL;
+}
+
+    static inline void zhash_str_str_put(zhash_t *zh, char *key, char *value)
+    {
+        char *oldkey, *oldval;
+        if (zhash_put(zh, &key, &value, &oldkey, &oldval)) {
+            free(oldkey);
+            free(oldval);
+        }
+    }
+
+    static inline void zhash_str_str_destroy(zhash_t *zh)
+    {
+        zhash_iterator_t zit;
+        zhash_iterator_init(zh, &zit);
+
+        char *key, *value;
+        while (zhash_iterator_next(&zit, &key, &value)) {
+            free(key);
+            free(value);
+        }
+
+        zhash_destroy(zh);
+    }
+
+
+static inline uint32_t zhash_int_hash(const void *_a)
+{
+    assert(_a != NULL);
+
+    uint32_t a = *((int*) _a);
+    return a;
+}
+
+static inline int zhash_int_equals(const void *_a, const void *_b)
+{
+    assert(_a != NULL);
+    assert(_b != NULL);
+
+    int a = *((int*) _a);
+    int b = *((int*) _b);
+
+    return a==b;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/zmaxheap.c b/common/zmaxheap.c
new file mode 100644
index 0000000..e04d03e
--- /dev/null
+++ b/common/zmaxheap.c
@@ -0,0 +1,422 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+#include <stdint.h>
+
+#include "zmaxheap.h"
+#include "debug_print.h"
+
+#ifdef _WIN32
+static inline long int random(void)
+{
+        return rand();
+}
+#endif
+
+//                 0
+//         1               2
+//      3     4        5       6
+//     7 8   9 10    11 12   13 14
+//
+// Children of node i:  2*i+1, 2*i+2
+// Parent of node i: (i-1) / 2
+//
+// Heap property: a parent is greater than (or equal to) its children.
+
+#define MIN_CAPACITY 16
+
+struct zmaxheap
+{
+    size_t el_sz;
+
+    int size;
+    int alloc;
+
+    float *values;
+    char *data;
+
+    void (*swap)(zmaxheap_t *heap, int a, int b);
+};
+
+static inline void swap_default(zmaxheap_t *heap, int a, int b)
+{
+    float t = heap->values[a];
+    heap->values[a] = heap->values[b];
+    heap->values[b] = t;
+
+    char *tmp = malloc(sizeof(char)*heap->el_sz);
+    memcpy(tmp, &heap->data[a*heap->el_sz], heap->el_sz);
+    memcpy(&heap->data[a*heap->el_sz], &heap->data[b*heap->el_sz], heap->el_sz);
+    memcpy(&heap->data[b*heap->el_sz], tmp, heap->el_sz);
+    free(tmp);
+}
+
+static inline void swap_pointer(zmaxheap_t *heap, int a, int b)
+{
+    float t = heap->values[a];
+    heap->values[a] = heap->values[b];
+    heap->values[b] = t;
+
+    void **pp = (void**) heap->data;
+    void *tmp = pp[a];
+    pp[a] = pp[b];
+    pp[b] = tmp;
+}
+
+
+zmaxheap_t *zmaxheap_create(size_t el_sz)
+{
+    zmaxheap_t *heap = calloc(1, sizeof(zmaxheap_t));
+    heap->el_sz = el_sz;
+
+    heap->swap = swap_default;
+
+    if (el_sz == sizeof(void*))
+        heap->swap = swap_pointer;
+
+    return heap;
+}
+
+void zmaxheap_destroy(zmaxheap_t *heap)
+{
+    free(heap->values);
+    free(heap->data);
+    memset(heap, 0, sizeof(zmaxheap_t));
+    free(heap);
+}
+
+int zmaxheap_size(zmaxheap_t *heap)
+{
+    return heap->size;
+}
+
+void zmaxheap_ensure_capacity(zmaxheap_t *heap, int capacity)
+{
+    if (heap->alloc >= capacity)
+        return;
+
+    int newcap = heap->alloc;
+
+    while (newcap < capacity) {
+        if (newcap < MIN_CAPACITY) {
+            newcap = MIN_CAPACITY;
+            continue;
+        }
+
+        newcap *= 2;
+    }
+
+    heap->values = realloc(heap->values, newcap * sizeof(float));
+    heap->data = realloc(heap->data, newcap * heap->el_sz);
+    heap->alloc = newcap;
+}
+
+void zmaxheap_add(zmaxheap_t *heap, void *p, float v)
+{
+
+    assert (isfinite(v) && "zmaxheap_add: Trying to add non-finite number to heap.  NaN's prohibited, could allow INF with testing");
+    zmaxheap_ensure_capacity(heap, heap->size + 1);
+
+    int idx = heap->size;
+
+    heap->values[idx] = v;
+    memcpy(&heap->data[idx*heap->el_sz], p, heap->el_sz);
+
+    heap->size++;
+
+    while (idx > 0) {
+
+        int parent = (idx - 1) / 2;
+
+        // we're done!
+        if (heap->values[parent] >= v)
+            break;
+
+        // else, swap and recurse upwards.
+        heap->swap(heap, idx, parent);
+        idx = parent;
+    }
+}
+
+void zmaxheap_vmap(zmaxheap_t *heap, void (*f)())
+{
+    assert(heap != NULL);
+    assert(f != NULL);
+    assert(heap->el_sz == sizeof(void*));
+
+    for (int idx = 0; idx < heap->size; idx++) {
+        void *p = NULL;
+        memcpy(&p, &heap->data[idx*heap->el_sz], heap->el_sz);
+        if (p == NULL) {
+            debug_print("Warning: zmaxheap_vmap item %d is NULL\n", idx);
+        }
+        f(p);
+    }
+}
+
+// Removes the item in the heap at the given index.  Returns 1 if the
+// item existed. 0 Indicates an invalid idx (heap is smaller than
+// idx). This is mostly intended to be used by zmaxheap_remove_max.
+int zmaxheap_remove_index(zmaxheap_t *heap, int idx, void *p, float *v)
+{
+    if (idx >= heap->size)
+        return 0;
+
+    // copy out the requested element from the heap.
+    if (v != NULL)
+        *v = heap->values[idx];
+    if (p != NULL)
+        memcpy(p, &heap->data[idx*heap->el_sz], heap->el_sz);
+
+    heap->size--;
+
+    // If this element is already the last one, then there's nothing
+    // for us to do.
+    if (idx == heap->size)
+        return 1;
+
+    // copy last element to first element. (which probably upsets
+    // the heap property).
+    heap->values[idx] = heap->values[heap->size];
+    memcpy(&heap->data[idx*heap->el_sz], &heap->data[heap->el_sz * heap->size], heap->el_sz);
+
+    // now fix the heap. Note, as we descend, we're "pushing down"
+    // the same node the entire time. Thus, while the index of the
+    // parent might change, the parent_score doesn't.
+    int parent = idx;
+    float parent_score = heap->values[idx];
+
+    // descend, fixing the heap.
+    while (parent < heap->size) {
+
+        int left = 2*parent + 1;
+        int right = left + 1;
+
+//            assert(parent_score == heap->values[parent]);
+
+        float left_score = (left < heap->size) ? heap->values[left] : -INFINITY;
+        float right_score = (right < heap->size) ? heap->values[right] : -INFINITY;
+
+        // put the biggest of (parent, left, right) as the parent.
+
+        // already okay?
+        if (parent_score >= left_score && parent_score >= right_score)
+            break;
+
+        // if we got here, then one of the children is bigger than the parent.
+        if (left_score >= right_score) {
+            assert(left < heap->size);
+            heap->swap(heap, parent, left);
+            parent = left;
+        } else {
+            // right_score can't be less than left_score if right_score is -INFINITY.
+            assert(right < heap->size);
+            heap->swap(heap, parent, right);
+            parent = right;
+        }
+    }
+
+    return 1;
+}
+
+int zmaxheap_remove_max(zmaxheap_t *heap, void *p, float *v)
+{
+    return zmaxheap_remove_index(heap, 0, p, v);
+}
+
+void zmaxheap_iterator_init(zmaxheap_t *heap, zmaxheap_iterator_t *it)
+{
+    memset(it, 0, sizeof(zmaxheap_iterator_t));
+    it->heap = heap;
+    it->in = 0;
+    it->out = 0;
+}
+
+int zmaxheap_iterator_next(zmaxheap_iterator_t *it, void *p, float *v)
+{
+    zmaxheap_t *heap = it->heap;
+
+    if (it->in >= zmaxheap_size(heap))
+        return 0;
+
+    *v = heap->values[it->in];
+    memcpy(p, &heap->data[it->in*heap->el_sz], heap->el_sz);
+
+    if (it->in != it->out) {
+        heap->values[it->out] = heap->values[it->in];
+        memcpy(&heap->data[it->out*heap->el_sz], &heap->data[it->in*heap->el_sz], heap->el_sz);
+    }
+
+    it->in++;
+    it->out++;
+    return 1;
+}
+
+int zmaxheap_iterator_next_volatile(zmaxheap_iterator_t *it, void *p, float *v)
+{
+    zmaxheap_t *heap = it->heap;
+
+    if (it->in >= zmaxheap_size(heap))
+        return 0;
+
+    *v = heap->values[it->in];
+    *((void**) p) = &heap->data[it->in*heap->el_sz];
+
+    if (it->in != it->out) {
+        heap->values[it->out] = heap->values[it->in];
+        memcpy(&heap->data[it->out*heap->el_sz], &heap->data[it->in*heap->el_sz], heap->el_sz);
+    }
+
+    it->in++;
+    it->out++;
+    return 1;
+}
+
+void zmaxheap_iterator_remove(zmaxheap_iterator_t *it)
+{
+    it->out--;
+}
+
+static void maxheapify(zmaxheap_t *heap, int parent)
+{
+    int left = 2*parent + 1;
+    int right = 2*parent + 2;
+
+    int betterchild = parent;
+
+    if (left < heap->size && heap->values[left] > heap->values[betterchild])
+        betterchild = left;
+    if (right < heap->size && heap->values[right] > heap->values[betterchild])
+        betterchild = right;
+
+    if (betterchild != parent) {
+        heap->swap(heap, parent, betterchild);
+        return maxheapify(heap, betterchild);
+    }
+}
+
+#if 0 //won't compile if defined but not used
+// test the heap property
+static void validate(zmaxheap_t *heap)
+{
+    for (int parent = 0; parent < heap->size; parent++) {
+        int left = 2*parent + 1;
+        int right = 2*parent + 2;
+
+        if (left < heap->size) {
+            assert(heap->values[parent] > heap->values[left]);
+        }
+
+        if (right < heap->size) {
+            assert(heap->values[parent] > heap->values[right]);
+        }
+    }
+}
+#endif
+void zmaxheap_iterator_finish(zmaxheap_iterator_t *it)
+{
+    // if nothing was removed, no work to do.
+    if (it->in == it->out)
+        return;
+
+    zmaxheap_t *heap = it->heap;
+
+    heap->size = it->out;
+
+    // restore heap property
+    for (int i = heap->size/2 - 1; i >= 0; i--)
+        maxheapify(heap, i);
+}
+
+void zmaxheap_test()
+{
+    int cap = 10000;
+    int sz = 0;
+    int32_t *vals = calloc(sizeof(int32_t), cap);
+
+    zmaxheap_t *heap = zmaxheap_create(sizeof(int32_t));
+
+    int maxsz = 0;
+    int zcnt = 0;
+
+    for (int iter = 0; iter < 5000000; iter++) {
+        assert(sz == heap->size);
+
+        if ((random() & 1) == 0 && sz < cap) {
+            // add a value
+            int32_t v = (int32_t) (random() / 1000);
+            float fv = v;
+            assert(v == fv);
+
+            vals[sz] = v;
+            zmaxheap_add(heap, &v, fv);
+            sz++;
+
+//            printf("add %d %f\n", v, fv);
+        } else {
+            // remove a value
+            int maxv = -1, maxi = -1;
+
+            for (int i = 0; i < sz; i++) {
+                if (vals[i] > maxv) {
+                    maxv = vals[i];
+                    maxi = i;
+                }
+            }
+
+
+            int32_t outv;
+            float outfv;
+            int res = zmaxheap_remove_max(heap, &outv, &outfv);
+            if (sz == 0) {
+                assert(res == 0);
+            } else {
+//                printf("%d %d %d %f\n", sz, maxv, outv, outfv);
+                assert(outv == outfv);
+                assert(maxv == outv);
+
+                // shuffle erase the maximum from our list.
+                vals[maxi] = vals[sz - 1];
+                sz--;
+            }
+        }
+
+        if (sz > maxsz)
+            maxsz = sz;
+
+        if (maxsz > 0 && sz == 0)
+            zcnt++;
+    }
+
+    printf("max size: %d, zcount %d\n", maxsz, zcnt);
+    free (vals);
+}
diff --git a/common/zmaxheap.h b/common/zmaxheap.h
new file mode 100644
index 0000000..f0020f9
--- /dev/null
+++ b/common/zmaxheap.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#pragma once
+
+#include <stdio.h>
+
+typedef struct zmaxheap zmaxheap_t;
+
+typedef struct zmaxheap_iterator zmaxheap_iterator_t;
+struct zmaxheap_iterator {
+    zmaxheap_t *heap;
+    int in, out;
+};
+
+zmaxheap_t *zmaxheap_create(size_t el_sz);
+
+void zmaxheap_vmap(zmaxheap_t *heap, void (*f)());
+
+void zmaxheap_destroy(zmaxheap_t *heap);
+
+void zmaxheap_add(zmaxheap_t *heap, void *p, float v);
+
+int zmaxheap_size(zmaxheap_t *heap);
+
+// returns 0 if the heap is empty, so you can do
+// while (zmaxheap_remove_max(...)) { }
+int zmaxheap_remove_max(zmaxheap_t *heap, void *p, float *v);
+
+////////////////////////////////////////////
+// This is a peculiar iterator intended to support very specific (and
+// unusual) applications, and the heap is not necessarily in a valid
+// state until zmaxheap_iterator_finish is called.  Consequently, do
+// not call any other methods on the heap while iterating through.
+
+// you must provide your own storage for the iterator, and pass in a
+// pointer.
+void zmaxheap_iterator_init(zmaxheap_t *heap, zmaxheap_iterator_t *it);
+
+// Traverses the heap in top-down/left-right order. makes a copy of
+// the content into memory (p) that you provide.
+int zmaxheap_iterator_next(zmaxheap_iterator_t *it, void *p, float *v);
+
+// will set p to be a pointer to the heap's internal copy of the dfata.
+int zmaxheap_iterator_next_volatile(zmaxheap_iterator_t *it, void *p, float *v);
+
+// remove the current element.
+void zmaxheap_iterator_remove(zmaxheap_iterator_t *it);
+
+// call after all iterator operations are done. After calling this,
+// the iterator should no longer be used, but the heap methods can be.
+void zmaxheap_iterator_finish(zmaxheap_iterator_t *it);