Switch to faster thresholding code
The hand-unrolled version wasn't actually faster. The
compiler-unrollable version from before actually is though. It's also
much shorter.
Change-Id: I450c4f7ae12e25e06da0b60a308eaeaea4cb05d7
diff --git a/aos/vision/blob/threshold.cc b/aos/vision/blob/threshold.cc
index 74809d1..36dcafe 100644
--- a/aos/vision/blob/threshold.cc
+++ b/aos/vision/blob/threshold.cc
@@ -4,20 +4,17 @@
namespace aos {
namespace vision {
+namespace {
-// Expands to a unique value for each combination of values for 5 bools.
-#define MASH(v0, v1, v2, v3, v4) \
- ((uint8_t(v0) << 4) | (uint8_t(v1) << 3) | (uint8_t(v2) << 2) | \
- (uint8_t(v3) << 1) | (uint8_t(v4)))
+constexpr int kChunkSize = 8;
+
+} // namespace
// At a high level, the algorithm is the same as the slow thresholding, except
-// it operates in 4-pixel chunks. The handling for each of these chunks is
-// manually flattened (via codegen) into a 32-case switch statement. There are
-// 2^4 cases for each pixel being in or out, along with another set of cases
-// depending on whether the start of the chunk is in a range or not.
+// it operates in kChunkSize-pixel chunks.
RangeImage FastYuyvYThreshold(ImageFormat fmt, const char *data,
uint8_t value) {
- CHECK_EQ(0, fmt.w % 4);
+ CHECK_EQ(0, fmt.w % kChunkSize);
std::vector<std::vector<ImageRange>> result;
result.reserve(fmt.h);
@@ -28,195 +25,22 @@
bool in_range = false;
int current_range_start = -1;
std::vector<ImageRange> current_row_ranges;
- // Iterate through each 4-pixel chunk
- for (int x = 0; x < fmt.w / 4; ++x) {
+ // Iterate through each kChunkSize-pixel chunk
+ for (int x = 0; x < fmt.w / kChunkSize; ++x) {
// The per-channel (YUYV) values in the current chunk.
- uint8_t chunk_channels[8];
- memcpy(&chunk_channels[0], current_row + x * 4 * 2, 8);
- const uint8_t pattern =
- MASH(in_range, chunk_channels[0] > value, chunk_channels[2] > value,
- chunk_channels[4] > value, chunk_channels[6] > value);
- switch (pattern) {
- // clang-format off
-/*
-# Ruby code to generate the below code:
-32.times do |v|
- puts "case MASH(#{[v[4], v[3], v[2], v[1], v[0]].join(", ")}):"
- in_range = v[4]
- current_range_start = "current_range_start"
- 4.times do |i|
- if v[3 - i] != in_range
- if (in_range == 1)
- puts " current_row_ranges.emplace_back(ImageRange(#{current_range_start}, x * 4 + #{i}));"
- else
- current_range_start = "x * 4 + #{i}"
- end
- in_range = v[3 - i]
- end
- end
- if (current_range_start != "current_range_start")
- puts " current_range_start = #{current_range_start};"
- end
- if (in_range != v[4])
- puts " in_range = #{["false", "true"][v[0]]};"
- end
- puts " break;"
-end
-*/
- // clang-format on
- case MASH(0, 0, 0, 0, 0):
- break;
- case MASH(0, 0, 0, 0, 1):
- current_range_start = x * 4 + 3;
- in_range = true;
- break;
- case MASH(0, 0, 0, 1, 0):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- current_range_start = x * 4 + 2;
- break;
- case MASH(0, 0, 0, 1, 1):
- current_range_start = x * 4 + 2;
- in_range = true;
- break;
- case MASH(0, 0, 1, 0, 0):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- current_range_start = x * 4 + 1;
- break;
- case MASH(0, 0, 1, 0, 1):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- current_range_start = x * 4 + 3;
- in_range = true;
- break;
- case MASH(0, 0, 1, 1, 0):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
- current_range_start = x * 4 + 1;
- break;
- case MASH(0, 0, 1, 1, 1):
- current_range_start = x * 4 + 1;
- in_range = true;
- break;
- case MASH(0, 1, 0, 0, 0):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- current_range_start = x * 4 + 0;
- break;
- case MASH(0, 1, 0, 0, 1):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- current_range_start = x * 4 + 3;
- in_range = true;
- break;
- case MASH(0, 1, 0, 1, 0):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- current_range_start = x * 4 + 2;
- break;
- case MASH(0, 1, 0, 1, 1):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 1));
- current_range_start = x * 4 + 2;
- in_range = true;
- break;
- case MASH(0, 1, 1, 0, 0):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
- current_range_start = x * 4 + 0;
- break;
- case MASH(0, 1, 1, 0, 1):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 2));
- current_range_start = x * 4 + 3;
- in_range = true;
- break;
- case MASH(0, 1, 1, 1, 0):
- current_row_ranges.emplace_back(ImageRange(x * 4 + 0, x * 4 + 3));
- current_range_start = x * 4 + 0;
- break;
- case MASH(0, 1, 1, 1, 1):
- current_range_start = x * 4 + 0;
- in_range = true;
- break;
- case MASH(1, 0, 0, 0, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- in_range = false;
- break;
- case MASH(1, 0, 0, 0, 1):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- current_range_start = x * 4 + 3;
- break;
- case MASH(1, 0, 0, 1, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- current_range_start = x * 4 + 2;
- in_range = false;
- break;
- case MASH(1, 0, 0, 1, 1):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- current_range_start = x * 4 + 2;
- break;
- case MASH(1, 0, 1, 0, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- current_range_start = x * 4 + 1;
- in_range = false;
- break;
- case MASH(1, 0, 1, 0, 1):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 2));
- current_range_start = x * 4 + 3;
- break;
- case MASH(1, 0, 1, 1, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- current_row_ranges.emplace_back(ImageRange(x * 4 + 1, x * 4 + 3));
- current_range_start = x * 4 + 1;
- in_range = false;
- break;
- case MASH(1, 0, 1, 1, 1):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 0));
- current_range_start = x * 4 + 1;
- break;
- case MASH(1, 1, 0, 0, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 1));
- in_range = false;
- break;
- case MASH(1, 1, 0, 0, 1):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 1));
- current_range_start = x * 4 + 3;
- break;
- case MASH(1, 1, 0, 1, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 1));
- current_row_ranges.emplace_back(ImageRange(x * 4 + 2, x * 4 + 3));
- current_range_start = x * 4 + 2;
- in_range = false;
- break;
- case MASH(1, 1, 0, 1, 1):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 1));
- current_range_start = x * 4 + 2;
- break;
- case MASH(1, 1, 1, 0, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 2));
- in_range = false;
- break;
- case MASH(1, 1, 1, 0, 1):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 2));
- current_range_start = x * 4 + 3;
- break;
- case MASH(1, 1, 1, 1, 0):
- current_row_ranges.emplace_back(
- ImageRange(current_range_start, x * 4 + 3));
- in_range = false;
- break;
- case MASH(1, 1, 1, 1, 1):
- break;
+ uint8_t chunk_channels[2 * kChunkSize];
+ memcpy(&chunk_channels[0], current_row + x * kChunkSize * 2, 2 * kChunkSize);
+
+ for (int i = 0; i < kChunkSize; ++i) {
+ if ((chunk_channels[i * 2] > value) != in_range) {
+ const int here = x * kChunkSize + i;
+ if (in_range) {
+ current_row_ranges.emplace_back(ImageRange(current_range_start, here));
+ } else {
+ current_range_start = here;
+ }
+ in_range = !in_range;
+ }
}
}
if (in_range) {
@@ -227,7 +51,5 @@
return RangeImage(0, std::move(result));
}
-#undef MASH
-
} // namespace vision
} // namespace aos