Put all the thresholding code in the same place

This will make it easier to write tests validating the fast version.

Change-Id: I27cc372cc848c495d10a138b3c149d550dc0dad9
diff --git a/aos/vision/blob/BUILD b/aos/vision/blob/BUILD
index 69a2cc5..d22aa4d 100644
--- a/aos/vision/blob/BUILD
+++ b/aos/vision/blob/BUILD
@@ -37,7 +37,12 @@
 
 cc_library(
     name = "threshold",
-    hdrs = ["threshold.h"],
+    srcs = [
+        "threshold.cc",
+    ],
+    hdrs = [
+        "threshold.h",
+    ],
     deps = [
         ":range_image",
         "//aos/vision/image:image_types",
diff --git a/aos/vision/blob/threshold.cc b/aos/vision/blob/threshold.cc
new file mode 100644
index 0000000..4fc58eb
--- /dev/null
+++ b/aos/vision/blob/threshold.cc
@@ -0,0 +1,213 @@
+#include "aos/vision/blob/threshold.h"
+
+namespace aos {
+namespace vision {
+
+#define MASH(v0, v1, v2, v3, v4)                                  \
+  ((uint8_t(v0) << 4) | (uint8_t(v1) << 3) | (uint8_t(v2) << 2) | \
+   (uint8_t(v3) << 1) | (uint8_t(v4)))
+
+RangeImage FastYuyvYThreshold(ImageFormat fmt, const char *data,
+                              uint8_t value) {
+  std::vector<std::vector<ImageRange>> ranges;
+  ranges.reserve(fmt.h);
+  for (int y = 0; y < fmt.h; ++y) {
+    const char *row = fmt.w * y * 2 + data;
+    bool p_score = false;
+    int pstart = -1;
+    std::vector<ImageRange> rngs;
+    for (int x = 0; x < fmt.w / 4; ++x) {
+      uint8_t v[8];
+      memcpy(&v[0], row + x * 4 * 2, 8);
+      uint8_t pattern =
+          MASH(p_score, v[0] > value, v[2] > value, v[4] > value, v[6] > value);
+      switch (pattern) {
+        /*
+# Ruby code to generate the below code:
+32.times do |v|
+        puts "case MASH(#{[v[4], v[3], v[2], v[1], v[0]].join(", ")}):"
+        p_score = v[4]
+        pstart = "pstart"
+        4.times do |i|
+                if v[3 - i] != p_score
+                        if (p_score == 1)
+                                puts "  rngs.emplace_back(ImageRange(#{pstart},
+x * 4 + #{i}));"
+                        else
+                                pstart = "x * 4 + #{i}"
+                        end
+                        p_score = v[3 - i]
+                end
+        end
+        if (pstart != "pstart")
+                puts "  pstart = #{pstart};"
+        end
+        if (p_score != v[4])
+                puts "  p_score = #{["false", "true"][v[0]]};"
+        end
+        puts "  break;"
+end
+*/
+        case MASH(0, 0, 0, 0, 0):
+          break;
+        case MASH(0, 0, 0, 0, 1):
+          pstart = x * 4 + 3;
+          p_score = true;
+          break;
+        case MASH(0, 0, 0, 1, 0):
+          rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+          pstart = x * 4 + 2;
+          break;
+        case MASH(0, 0, 0, 1, 1):
+          pstart = x * 4 + 2;
+          p_score = true;
+          break;
+        case MASH(0, 0, 1, 0, 0):
+          rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+          pstart = x * 4 + 1;
+          break;
+        case MASH(0, 0, 1, 0, 1):
+          rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+          pstart = x * 4 + 3;
+          p_score = true;
+          break;
+        case MASH(0, 0, 1, 1, 0):
+          rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
+          pstart = x * 4 + 1;
+          break;
+        case MASH(0, 0, 1, 1, 1):
+          pstart = x * 4 + 1;
+          p_score = true;
+          break;
+        case MASH(0, 1, 0, 0, 0):
+          rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+          pstart = x * 4 + 0;
+          break;
+        case MASH(0, 1, 0, 0, 1):
+          rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+          pstart = x * 4 + 3;
+          p_score = true;
+          break;
+        case MASH(0, 1, 0, 1, 0):
+          rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+          rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+          pstart = x * 4 + 2;
+          break;
+        case MASH(0, 1, 0, 1, 1):
+          rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+          pstart = x * 4 + 2;
+          p_score = true;
+          break;
+        case MASH(0, 1, 1, 0, 0):
+          rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
+          pstart = x * 4 + 0;
+          break;
+        case MASH(0, 1, 1, 0, 1):
+          rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
+          pstart = x * 4 + 3;
+          p_score = true;
+          break;
+        case MASH(0, 1, 1, 1, 0):
+          rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 3));
+          pstart = x * 4 + 0;
+          break;
+        case MASH(0, 1, 1, 1, 1):
+          pstart = x * 4 + 0;
+          p_score = true;
+          break;
+        case MASH(1, 0, 0, 0, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          p_score = false;
+          break;
+        case MASH(1, 0, 0, 0, 1):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          pstart = x * 4 + 3;
+          break;
+        case MASH(1, 0, 0, 1, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+          pstart = x * 4 + 2;
+          p_score = false;
+          break;
+        case MASH(1, 0, 0, 1, 1):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          pstart = x * 4 + 2;
+          break;
+        case MASH(1, 0, 1, 0, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+          pstart = x * 4 + 1;
+          p_score = false;
+          break;
+        case MASH(1, 0, 1, 0, 1):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+          pstart = x * 4 + 3;
+          break;
+        case MASH(1, 0, 1, 1, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
+          pstart = x * 4 + 1;
+          p_score = false;
+          break;
+        case MASH(1, 0, 1, 1, 1):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
+          pstart = x * 4 + 1;
+          break;
+        case MASH(1, 1, 0, 0, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
+          p_score = false;
+          break;
+        case MASH(1, 1, 0, 0, 1):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
+          pstart = x * 4 + 3;
+          break;
+        case MASH(1, 1, 0, 1, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
+          rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+          pstart = x * 4 + 2;
+          p_score = false;
+          break;
+        case MASH(1, 1, 0, 1, 1):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
+          pstart = x * 4 + 2;
+          break;
+        case MASH(1, 1, 1, 0, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 2));
+          p_score = false;
+          break;
+        case MASH(1, 1, 1, 0, 1):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 2));
+          pstart = x * 4 + 3;
+          break;
+        case MASH(1, 1, 1, 1, 0):
+          rngs.emplace_back(ImageRange(pstart, x * 4 + 3));
+          p_score = false;
+          break;
+        case MASH(1, 1, 1, 1, 1):
+          break;
+      }
+
+      for (int i = 0; i < 4; ++i) {
+        if ((v[i * 2] > value) != p_score) {
+          if (p_score) {
+            rngs.emplace_back(ImageRange(pstart, x * 4 + i));
+          } else {
+            pstart = x * 4 + i;
+          }
+          p_score = !p_score;
+        }
+      }
+    }
+    if (p_score) {
+      rngs.emplace_back(ImageRange(pstart, fmt.w));
+    }
+    ranges.push_back(rngs);
+  }
+  return RangeImage(0, std::move(ranges));
+}
+
+#undef MASH
+
+}  // namespace vision
+}  // namespace aos
diff --git a/aos/vision/blob/threshold.h b/aos/vision/blob/threshold.h
index eef5b20..441a058 100644
--- a/aos/vision/blob/threshold.h
+++ b/aos/vision/blob/threshold.h
@@ -1,15 +1,22 @@
-#ifndef _AOS_VIISON_BLOB_THRESHOLD_H_
-#define _AOS_VIISON_BLOB_THRESHOLD_H_
+#ifndef AOS_VISION_BLOB_THRESHOLD_H_
+#define AOS_VISION_BLOB_THRESHOLD_H_
 
 #include "aos/vision/blob/range_image.h"
 #include "aos/vision/image/image_types.h"
 
 namespace aos {
 namespace vision {
+namespace threshold_internal {
 
-// ThresholdFn should be a lambda.
-template <typename ThresholdFn>
-RangeImage DoThreshold(ImageFormat fmt, ThresholdFn &&fn) {
+// Performs thresholding in a given region using a function which determines
+// whether a given point is in or out of the region.
+//
+// fn must return a bool when called with two integers (x, y).
+template <typename PointTestFn>
+RangeImage ThresholdPointsWithFunction(ImageFormat fmt, PointTestFn &&fn) {
+  static_assert(
+      std::is_convertible<PointTestFn, std::function<bool(int, int)>>::value,
+      "Invalid threshold function");
   std::vector<std::vector<ImageRange>> ranges;
   ranges.reserve(fmt.h);
   for (int y = 0; y < fmt.h; ++y) {
@@ -34,23 +41,43 @@
   return RangeImage(0, std::move(ranges));
 }
 
-// ThresholdFn should be a lambda.
+}  // namespace threshold_internal
+
+// Thresholds an image using a function which determines whether a given pixel
+// value is in or out of the region.
+//
+// fn must return a bool when called with a PixelRef.
 template <typename ThresholdFn>
-RangeImage DoThreshold(const ImagePtr &img, ThresholdFn &&fn) {
-  return DoThreshold(img.fmt(),
-                     [&](int x, int y) { return fn(img.get_px(x, y)); });
+RangeImage ThresholdImageWithFunction(const ImagePtr &img, ThresholdFn &&fn) {
+  static_assert(
+      std::is_convertible<ThresholdFn, std::function<bool(PixelRef)>>::value,
+      "Invalid threshold function");
+  return threshold_internal::ThresholdPointsWithFunction(
+      img.fmt(), [&](int x, int y) { return fn(img.get_px(x, y)); });
 }
 
-// YUYV image types:
-inline RangeImage DoThresholdYUYV(ImageFormat fmt, const char *data,
-                                  uint8_t value) {
-  return DoThreshold(fmt, [&](int x, int y) {
-    uint8_t v = data[y * fmt.w * 2 + x * 2];
-    return v > value;
-  });
+// Thresholds an image in YUYV format, selecting pixels with a Y (luma) greater
+// than value.
+//
+// This is implemented via a simple function that pulls out the Y values and
+// compares them each. It mostly exists for tests to compare against
+// FastYuyvYThreshold, because it's obviously correct.
+inline RangeImage SlowYuyvYThreshold(ImageFormat fmt, const char *data,
+                                     uint8_t value) {
+  return threshold_internal::ThresholdPointsWithFunction(
+      fmt, [&](int x, int y) {
+        uint8_t v = data[x * 2 + y * fmt.w * 2];
+        return v > value;
+      });
 }
 
+// Thresholds an image in YUYV format, selecting pixels with a Y (luma) greater
+// than value.
+//
+// This is implemented via some tricky bit shuffling that goes fast.
+RangeImage FastYuyvYThreshold(ImageFormat fmt, const char *data, uint8_t value);
+
 }  // namespace vision
 }  // namespace aos
 
-#endif  //  _AOS_VIISON_BLOB_THRESHOLD_H_
+#endif  //  AOS_VISION_BLOB_THRESHOLD_H_