Make the fast thresholding work
The only functional change was removing the block which did the same
algorithm non-flattened. It has tests now to verify it does the same
thing as the slow version.
Change-Id: I63da1a6374c2adfabf764b62d5c33f91897ce2a7
diff --git a/aos/vision/blob/threshold.cc b/aos/vision/blob/threshold.cc
index 4fc58eb..74809d1 100644
--- a/aos/vision/blob/threshold.cc
+++ b/aos/vision/blob/threshold.cc
@@ -1,210 +1,230 @@
#include "aos/vision/blob/threshold.h"
+#include "aos/logging/logging.h"
+
namespace aos {
namespace vision {
+// Expands to a unique value for each combination of values for 5 bools.
#define MASH(v0, v1, v2, v3, v4) \
((uint8_t(v0) << 4) | (uint8_t(v1) << 3) | (uint8_t(v2) << 2) | \
(uint8_t(v3) << 1) | (uint8_t(v4)))
+// At a high level, the algorithm is the same as the slow thresholding, except
+// it operates in 4-pixel chunks. The handling for each of these chunks is
+// manually flattened (via codegen) into a 32-case switch statement. There are
+// 2^4 cases for each pixel being in or out, along with another set of cases
+// depending on whether the start of the chunk is in a range or not.
RangeImage FastYuyvYThreshold(ImageFormat fmt, const char *data,
uint8_t value) {
- std::vector<std::vector<ImageRange>> ranges;
- ranges.reserve(fmt.h);
+ CHECK_EQ(0, fmt.w % 4);
+ std::vector<std::vector<ImageRange>> result;
+ result.reserve(fmt.h);
+
+ // Iterate through each row.
for (int y = 0; y < fmt.h; ++y) {
- const char *row = fmt.w * y * 2 + data;
- bool p_score = false;
- int pstart = -1;
- std::vector<ImageRange> rngs;
+ // The start of the data for the current row.
+ const char *const current_row = fmt.w * y * 2 + data;
+ bool in_range = false;
+ int current_range_start = -1;
+ std::vector<ImageRange> current_row_ranges;
+ // Iterate through each 4-pixel chunk
for (int x = 0; x < fmt.w / 4; ++x) {
- uint8_t v[8];
- memcpy(&v[0], row + x * 4 * 2, 8);
- uint8_t pattern =
- MASH(p_score, v[0] > value, v[2] > value, v[4] > value, v[6] > value);
+ // The per-channel (YUYV) values in the current chunk.
+ uint8_t chunk_channels[8];
+ memcpy(&chunk_channels[0], current_row + x * 4 * 2, 8);
+ const uint8_t pattern =
+ MASH(in_range, chunk_channels[0] > value, chunk_channels[2] > value,
+ chunk_channels[4] > value, chunk_channels[6] > value);
switch (pattern) {
- /*
+ // clang-format off
+/*
# Ruby code to generate the below code:
32.times do |v|
puts "case MASH(#{[v[4], v[3], v[2], v[1], v[0]].join(", ")}):"
- p_score = v[4]
- pstart = "pstart"
+ in_range = v[4]
+ current_range_start = "current_range_start"
4.times do |i|
- if v[3 - i] != p_score
- if (p_score == 1)
- puts " rngs.emplace_back(ImageRange(#{pstart},
-x * 4 + #{i}));"
+ if v[3 - i] != in_range
+ if (in_range == 1)
+ puts " current_row_ranges.emplace_back(ImageRange(#{current_range_start}, x * 4 + #{i}));"
else
- pstart = "x * 4 + #{i}"
+ current_range_start = "x * 4 + #{i}"
end
- p_score = v[3 - i]
+ in_range = v[3 - i]
end
end
- if (pstart != "pstart")
- puts " pstart = #{pstart};"
+ if (current_range_start != "current_range_start")
+ puts " current_range_start = #{current_range_start};"
end
- if (p_score != v[4])
- puts " p_score = #{["false", "true"][v[0]]};"
+ if (in_range != v[4])
+ puts " in_range = #{["false", "true"][v[0]]};"
end
puts " break;"
end
*/
+ // clang-format on
case MASH(0, 0, 0, 0, 0):
break;
case MASH(0, 0, 0, 0, 1):
- pstart = x * 4 + 3;
- p_score = true;
+ current_range_start = x * 4 + 3;
+ in_range = true;
break;
case MASH(0, 0, 0, 1, 0):
- rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- pstart = x * 4 + 2;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+ current_range_start = x * 4 + 2;
break;
case MASH(0, 0, 0, 1, 1):
- pstart = x * 4 + 2;
- p_score = true;
+ current_range_start = x * 4 + 2;
+ in_range = true;
break;
case MASH(0, 0, 1, 0, 0):
- rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- pstart = x * 4 + 1;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+ current_range_start = x * 4 + 1;
break;
case MASH(0, 0, 1, 0, 1):
- rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- pstart = x * 4 + 3;
- p_score = true;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+ current_range_start = x * 4 + 3;
+ in_range = true;
break;
case MASH(0, 0, 1, 1, 0):
- rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
- pstart = x * 4 + 1;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
+ current_range_start = x * 4 + 1;
break;
case MASH(0, 0, 1, 1, 1):
- pstart = x * 4 + 1;
- p_score = true;
+ current_range_start = x * 4 + 1;
+ in_range = true;
break;
case MASH(0, 1, 0, 0, 0):
- rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- pstart = x * 4 + 0;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+ current_range_start = x * 4 + 0;
break;
case MASH(0, 1, 0, 0, 1):
- rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- pstart = x * 4 + 3;
- p_score = true;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+ current_range_start = x * 4 + 3;
+ in_range = true;
break;
case MASH(0, 1, 0, 1, 0):
- rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- pstart = x * 4 + 2;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+ current_range_start = x * 4 + 2;
break;
case MASH(0, 1, 0, 1, 1):
- rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- pstart = x * 4 + 2;
- p_score = true;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
+ current_range_start = x * 4 + 2;
+ in_range = true;
break;
case MASH(0, 1, 1, 0, 0):
- rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
- pstart = x * 4 + 0;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
+ current_range_start = x * 4 + 0;
break;
case MASH(0, 1, 1, 0, 1):
- rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
- pstart = x * 4 + 3;
- p_score = true;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
+ current_range_start = x * 4 + 3;
+ in_range = true;
break;
case MASH(0, 1, 1, 1, 0):
- rngs.emplace_back(ImageRange(x * 4 + 0, x * 4 + 3));
- pstart = x * 4 + 0;
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 3));
+ current_range_start = x * 4 + 0;
break;
case MASH(0, 1, 1, 1, 1):
- pstart = x * 4 + 0;
- p_score = true;
+ current_range_start = x * 4 + 0;
+ in_range = true;
break;
case MASH(1, 0, 0, 0, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ in_range = false;
break;
case MASH(1, 0, 0, 0, 1):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- pstart = x * 4 + 3;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ current_range_start = x * 4 + 3;
break;
case MASH(1, 0, 0, 1, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- pstart = x * 4 + 2;
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+ current_range_start = x * 4 + 2;
+ in_range = false;
break;
case MASH(1, 0, 0, 1, 1):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- pstart = x * 4 + 2;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ current_range_start = x * 4 + 2;
break;
case MASH(1, 0, 1, 0, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- pstart = x * 4 + 1;
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+ current_range_start = x * 4 + 1;
+ in_range = false;
break;
case MASH(1, 0, 1, 0, 1):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- pstart = x * 4 + 3;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
+ current_range_start = x * 4 + 3;
break;
case MASH(1, 0, 1, 1, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- rngs.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
- pstart = x * 4 + 1;
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
+ current_range_start = x * 4 + 1;
+ in_range = false;
break;
case MASH(1, 0, 1, 1, 1):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 0));
- pstart = x * 4 + 1;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 0));
+ current_range_start = x * 4 + 1;
break;
case MASH(1, 1, 0, 0, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 1));
+ in_range = false;
break;
case MASH(1, 1, 0, 0, 1):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
- pstart = x * 4 + 3;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 1));
+ current_range_start = x * 4 + 3;
break;
case MASH(1, 1, 0, 1, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
- rngs.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- pstart = x * 4 + 2;
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 1));
+ current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
+ current_range_start = x * 4 + 2;
+ in_range = false;
break;
case MASH(1, 1, 0, 1, 1):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 1));
- pstart = x * 4 + 2;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 1));
+ current_range_start = x * 4 + 2;
break;
case MASH(1, 1, 1, 0, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 2));
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 2));
+ in_range = false;
break;
case MASH(1, 1, 1, 0, 1):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 2));
- pstart = x * 4 + 3;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 2));
+ current_range_start = x * 4 + 3;
break;
case MASH(1, 1, 1, 1, 0):
- rngs.emplace_back(ImageRange(pstart, x * 4 + 3));
- p_score = false;
+ current_row_ranges.emplace_back(
+ ImageRange(current_range_start, x * 4 + 3));
+ in_range = false;
break;
case MASH(1, 1, 1, 1, 1):
break;
}
-
- for (int i = 0; i < 4; ++i) {
- if ((v[i * 2] > value) != p_score) {
- if (p_score) {
- rngs.emplace_back(ImageRange(pstart, x * 4 + i));
- } else {
- pstart = x * 4 + i;
- }
- p_score = !p_score;
- }
- }
}
- if (p_score) {
- rngs.emplace_back(ImageRange(pstart, fmt.w));
+ if (in_range) {
+ current_row_ranges.emplace_back(ImageRange(current_range_start, fmt.w));
}
- ranges.push_back(rngs);
+ result.push_back(current_row_ranges);
}
- return RangeImage(0, std::move(ranges));
+ return RangeImage(0, std::move(result));
}
#undef MASH