Make SIFT faster
This uses various halide-optimized functions to do the actual image
processing. It still finds around the same number of features, but much
faster.
Change-Id: I9d7f7093b0ec41acf7ed16b2c91cdadada2f9a22
diff --git a/y2020/vision/sift/BUILD b/y2020/vision/sift/BUILD
index 0a2cb3a..d3bafc6 100644
--- a/y2020/vision/sift/BUILD
+++ b/y2020/vision/sift/BUILD
@@ -1,3 +1,116 @@
+load(":fast_gaussian.bzl", "fast_gaussian")
+
+cc_binary(
+ name = "fast_gaussian_generator",
+ srcs = [
+ "fast_gaussian_generator.cc",
+ ],
+ restricted_to = [
+ "//tools:k8",
+ "//tools:armhf-debian",
+ ],
+ deps = [
+ "//third_party:halide",
+ "//third_party:halide_gengen",
+ "//third_party:opencv",
+ "@com_github_google_glog//:glog",
+ ],
+)
+
+py_binary(
+ name = "fast_gaussian_runner",
+ srcs = [
+ "fast_gaussian_runner.py",
+ ],
+ data = [
+ ":fast_gaussian_generator",
+ # TODO(Brian): Replace this with something more fine-grained from the
+ # configuration fragment or something.
+ "//tools/cpp:toolchain",
+ ],
+ default_python_version = "PY3",
+ main = "fast_gaussian_runner.py",
+ restricted_to = [
+ "//tools:k8",
+ "//tools:armhf-debian",
+ ],
+ srcs_version = "PY2AND3",
+ deps = [
+ "@bazel_tools//tools/python/runfiles",
+ ],
+)
+
+# Each element is [sigma, sigma_name, radius].
+# opencv's default width is calculated as:
+# cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1
+# Pulling that in helps a lot with making it faster (less data to read, and less
+# math to do), but if you make it too narrow SIFT quickly freaks out.
+sigmas = [
+ [
+ "1.2262734984654078",
+ "1p2",
+ "9",
+ ],
+ [
+ "1.5450077936447955",
+ "1p5",
+ "11",
+ ],
+ [
+ "1.9465878414647133",
+ "1p9",
+ "13",
+ ],
+ [
+ "2.4525469969308156",
+ "2p4",
+ "15",
+ ],
+ [
+ "3.0900155872895909",
+ "3p1",
+ "19",
+ ],
+ # TODO(Brian): We only need one of these two for 1280x720. Don't generate
+ # all the redundant versions for other sizes, and maybe stop doing the one
+ # we don't actually use.
+ [
+ "1.2489997148513794",
+ "1p24",
+ "11",
+ ],
+ [
+ "1.5198683738708496",
+ "1p52",
+ "15",
+ ],
+]
+
+sizes = [
+ [
+ 1280,
+ 720,
+ ],
+ [
+ 640,
+ 360,
+ ],
+ [
+ 320,
+ 180,
+ ],
+ [
+ 160,
+ 90,
+ ],
+ [
+ 80,
+ 45,
+ ],
+]
+
+fast_gaussian(sigmas, sizes)
+
cc_library(
name = "sift971",
srcs = [
@@ -12,6 +125,47 @@
],
visibility = ["//visibility:public"],
deps = [
+ ":fast_gaussian",
"//third_party:opencv",
+ "@com_github_google_glog//:glog",
+ ],
+)
+
+cc_library(
+ name = "fast_gaussian",
+ srcs = [
+ "fast_gaussian.cc",
+ ],
+ hdrs = [
+ "fast_gaussian.h",
+ ],
+ restricted_to = [
+ "//tools:k8",
+ "//tools:armhf-debian",
+ ],
+ deps = [
+ ":fast_gaussian_all",
+ "//third_party:halide_runtime",
+ "//third_party:opencv",
+ "@com_github_google_glog//:glog",
+ ],
+)
+
+cc_binary(
+ name = "testing_sift",
+ srcs = [
+ "testing_sift.cc",
+ ],
+ restricted_to = [
+ "//tools:k8",
+ "//tools:armhf-debian",
+ ],
+ deps = [
+ ":fast_gaussian",
+ "//aos:init",
+ "//aos/time",
+ "//third_party:opencv",
+ "//y2020/vision/sift:sift971",
+ "@com_github_google_glog//:glog",
],
)
diff --git a/y2020/vision/sift/fast_gaussian.bzl b/y2020/vision/sift/fast_gaussian.bzl
new file mode 100644
index 0000000..a1c3173
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian.bzl
@@ -0,0 +1,55 @@
+def fast_gaussian(sigmas, sizes):
+ files = []
+ for _, sigma_name, _ in sigmas:
+ for cols, rows in sizes:
+ files.append("fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name))
+ for _, sigma_name, _ in sigmas:
+ for cols, rows in sizes:
+ files.append("fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name))
+ for cols, rows in sizes:
+ files.append('fast_subtract_%dx%d' % (cols, rows))
+
+ params = struct(
+ sigmas = sigmas,
+ sizes = sizes,
+ )
+
+ headers = [f + '.h' for f in files] + [
+ 'fast_gaussian_all.h',
+ ]
+ objects = [f + '.o' for f in files] + [
+ 'fast_gaussian_runtime.o',
+ ]
+ htmls = [f + '.html' for f in files]
+
+ native.genrule(
+ name = "generate_fast_gaussian",
+ tools = [
+ ":fast_gaussian_runner",
+ ],
+ cmd = ' '.join([
+ '$(location fast_gaussian_runner)',
+ "'" + params.to_json() + "'",
+ # TODO(Brian): This should be RULEDIR once we have support for that.
+ '$(@D)',
+ '$(TARGET_CPU)',
+ ]),
+ outs = headers + objects + htmls,
+ restricted_to = [
+ "//tools:k8",
+ "//tools:armhf-debian",
+ ],
+ )
+
+ native.cc_library(
+ name = 'fast_gaussian_all',
+ hdrs = ['fast_gaussian_all.h'],
+ srcs = headers + objects,
+ deps = [
+ '//third_party:halide_runtime',
+ ],
+ restricted_to = [
+ "//tools:k8",
+ "//tools:armhf-debian",
+ ],
+ )
diff --git a/y2020/vision/sift/fast_gaussian.cc b/y2020/vision/sift/fast_gaussian.cc
new file mode 100644
index 0000000..22549ac
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian.cc
@@ -0,0 +1,126 @@
+#include "y2020/vision/sift/fast_gaussian.h"
+
+#include <iomanip>
+
+#include <opencv2/imgproc.hpp>
+
+#include "y2020/vision/sift/fast_gaussian_all.h"
+
+namespace frc971 {
+namespace vision {
+namespace {
+
+void CheckNonOverlapping(const cv::Mat &a, const cv::Mat &b) {
+ CHECK(a.data > b.data + b.total() * b.elemSize() || a.data < b.data)
+ << ": images may not overlap";
+ CHECK(b.data > a.data + a.total() * a.elemSize() || b.data < a.data)
+ << ": images may not overlap";
+}
+
+// An easy toggle to always fall back to the slow implementations, to verify the
+// results are the same.
+constexpr bool kUseFast = true;
+
+// An easy toggle to print the result of all operations, for verifying that the
+// halide code is doing what we expect.
+constexpr bool kPrintAll = false;
+
+// We deliberately don't generate code for images smaller than this, so don't
+// print warnings about them.
+//
+// The opencv implementations are so fast below this size, the build time to
+// generate halide versions isn't worthwhile.
+constexpr int kMinWarnSize = 80;
+
+bool IsSmall(cv::Size size) {
+ return size.height <= kMinWarnSize && size.width <= kMinWarnSize;
+}
+
+} // namespace
+
+void FastGaussian(const cv::Mat &source, cv::Mat *destination, double sigma) {
+ CHECK_EQ(source.type(), CV_16SC1);
+
+ destination->create(source.size(), source.type());
+ CheckNonOverlapping(source, *destination);
+
+ int result = 1;
+ if (kUseFast) {
+ result = DoGeneratedFastGaussian(MatToHalide<const int16_t>(source),
+ MatToHalide<int16_t>(*destination), sigma);
+ }
+ if (kPrintAll) {
+ LOG(INFO) << result << ": " << source.rows << " " << source.cols << " "
+ << std::setprecision(17) << sigma;
+ }
+ if (result == 0) {
+ return;
+ }
+ if (!IsSmall(source.size())) {
+ LOG(WARNING) << "slow gaussian blur: " << source.rows << " " << source.cols
+ << " " << std::setprecision(17) << sigma;
+ }
+ CHECK_EQ(result, 1);
+
+ cv::GaussianBlur(source, *destination, cv::Size(), sigma, sigma,
+ cv::BORDER_REPLICATE);
+}
+
+void FastSubtract(const cv::Mat &a, const cv::Mat &b, cv::Mat *destination) {
+ CHECK(a.size() == b.size());
+ destination->create(a.size(), a.type());
+ CheckNonOverlapping(a, *destination);
+ CheckNonOverlapping(b, *destination);
+
+ int result = 1;
+ if (kUseFast) {
+ result = DoGeneratedFastSubtract(MatToHalide<const int16_t>(a),
+ MatToHalide<const int16_t>(b),
+ MatToHalide<int16_t>(*destination));
+ }
+ if (kPrintAll) {
+ LOG(INFO) << result << ": " << a.rows << " " << a.cols;
+ }
+ if (result == 0) {
+ return;
+ }
+ if (!IsSmall(a.size())) {
+ LOG(WARNING) << "slow subtract: " << a.rows << " " << a.cols;
+ }
+ CHECK_EQ(result, 1);
+
+ cv::subtract(a, b, *destination);
+}
+
+void FastGaussianAndSubtract(const cv::Mat &source, cv::Mat *blurred,
+ cv::Mat *difference, double sigma) {
+ CHECK_EQ(source.type(), CV_16SC1);
+ blurred->create(source.size(), source.type());
+ difference->create(source.size(), source.type());
+
+ int result = 1;
+ if (kUseFast) {
+ result = DoGeneratedFastGaussianAndSubtract(
+ MatToHalide<const int16_t>(source), MatToHalide<int16_t>(*blurred),
+ MatToHalide<int16_t>(*difference), sigma);
+ }
+ if (kPrintAll) {
+ LOG(INFO) << result << ": " << source.rows << " " << source.cols << " "
+ << std::setprecision(17) << sigma;
+ }
+ if (result == 0) {
+ return;
+ }
+ if (!IsSmall(source.size())) {
+ LOG(WARNING) << "slow gaussian blur: " << source.rows << " " << source.cols
+ << " " << std::setprecision(17) << sigma;
+ }
+ CHECK_EQ(result, 1);
+
+ cv::GaussianBlur(source, *blurred, cv::Size(), sigma, sigma,
+ cv::BORDER_REPLICATE);
+ cv::subtract(*blurred, source, *difference);
+}
+
+} // namespace vision
+} // namespace frc971
diff --git a/y2020/vision/sift/fast_gaussian.h b/y2020/vision/sift/fast_gaussian.h
new file mode 100644
index 0000000..580083f
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian.h
@@ -0,0 +1,44 @@
+#ifndef Y2020_VISION_SIFT_FAST_GAUSSIAN_H_
+#define Y2020_VISION_SIFT_FAST_GAUSSIAN_H_
+
+#include <type_traits>
+
+#include <opencv2/core/mat.hpp>
+#include "HalideBuffer.h"
+#include "glog/logging.h"
+
+namespace frc971 {
+namespace vision {
+
+// Returns a Halide buffer representing the data in mat.
+template <typename T>
+inline Halide::Runtime::Buffer<T, 2> MatToHalide(const cv::Mat &mat) {
+ CHECK_EQ(cv::DataType<typename std::remove_const<T>::type>::type, mat.type());
+ // Verify that at<T>(row, col) accesses this address:
+ // data + sizeof(T) * (row * cols + col)
+ CHECK_EQ(mat.elemSize(), sizeof(T));
+ CHECK_EQ(mat.elemSize1(), sizeof(T));
+ CHECK_EQ(mat.step1(0), static_cast<size_t>(mat.cols));
+ CHECK_EQ(mat.step1(1), 1u);
+ CHECK_EQ(mat.dims, 2);
+ CHECK(mat.isContinuous());
+ return Halide::Runtime::Buffer<T, 2>(reinterpret_cast<T *>(mat.data),
+ mat.cols, mat.rows);
+}
+
+// Performs a gaussian blur with the specified sigma, truncated to a reasonable
+// width. Attempts to use faster implementations, but will fall back to
+// cv::GaussianBlur otherwise. Only handles a limited set of Mat formats.
+//
+// source and destination may not overlap.
+//
+// Always uses BORDER_REPLICATE mode.
+void FastGaussian(const cv::Mat &source, cv::Mat *destination, double sigma);
+void FastSubtract(const cv::Mat &a, const cv::Mat &b, cv::Mat *destination);
+void FastGaussianAndSubtract(const cv::Mat &source, cv::Mat *blurred,
+ cv::Mat *difference, double sigma);
+
+} // namespace vision
+} // namespace vision
+
+#endif // Y2020_VISION_SIFT_FAST_GAUSSIAN_H_
diff --git a/y2020/vision/sift/fast_gaussian_generator.cc b/y2020/vision/sift/fast_gaussian_generator.cc
new file mode 100644
index 0000000..6418618
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian_generator.cc
@@ -0,0 +1,206 @@
+#include <opencv2/core/mat.hpp>
+#include <opencv2/imgproc.hpp>
+#include "Halide.h"
+#include "glog/logging.h"
+
+// This is a Halide "generator". This means it is a binary which generates
+// ahead-of-time optimized functions as directed by command-line arguments.
+// https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html has an
+// introduction to much of the magic in this file.
+
+namespace frc971 {
+namespace vision {
+namespace {
+
+// Returns a function implementating a 1-dimensional gaussian blur convolution.
+Halide::Func GenerateBlur(std::string name, Halide::Func in, int col_step,
+ int row_step, int radius, cv::Mat kernel,
+ Halide::Var col, Halide::Var row) {
+ Halide::Expr expr = kernel.at<float>(0) * in(col, row);
+ for (int i = 1; i <= radius; ++i) {
+ expr += kernel.at<float>(i) * (in(col - i * col_step, row - i * row_step) +
+ in(col + i * col_step, row + i * row_step));
+ }
+ Halide::Func func(name);
+ func(col, row) = expr;
+ return func;
+}
+
+template <typename T>
+void SetRowMajor(T *buffer_parameter, int cols, int rows) {
+ buffer_parameter->dim(0).set_stride(1);
+ buffer_parameter->dim(0).set_extent(cols);
+ buffer_parameter->dim(0).set_min(0);
+ buffer_parameter->dim(1).set_stride(cols);
+ buffer_parameter->dim(1).set_extent(rows);
+ buffer_parameter->dim(1).set_min(0);
+}
+
+} // namespace
+
+class GaussianGenerator : public Halide::Generator<GaussianGenerator> {
+ public:
+ GeneratorParam<int> cols{"cols", 0};
+ GeneratorParam<int> rows{"rows", 0};
+ GeneratorParam<double> sigma{"sigma", -1};
+ GeneratorParam<int> filter_width{"filter_width", 0};
+
+ Input<Buffer<int16_t>> input{"input", 2};
+ Output<Buffer<int16_t>> output{"output", 2};
+
+ // We use opencv's naming convention, instead of the (x, y) which most of the
+ // halide examples use. This is easier to keep straight with the row-major
+ // storage order we're using though.
+ // col is first because incrementing the data index by 1 moves over 1 column.
+ Var col{"col"}, row{"row"};
+
+ void generate() {
+ CHECK(cols > 0) << ": Must specify a cols";
+ CHECK(rows > 0) << ": Must specify a rows";
+ CHECK(sigma > 0) << ": Must specify a sigma";
+ CHECK(filter_width > 0) << ": Must specify a filter_width";
+ CHECK((filter_width % 2) == 1)
+ << ": Invalid filter_width: " << static_cast<int>(filter_width);
+
+ SetRowMajor(&input, cols, rows);
+
+ const int radius = (filter_width - 1) / 2;
+ const cv::Mat kernel =
+ cv::getGaussianKernel(filter_width, sigma, CV_32F)
+ .rowRange(radius, filter_width);
+
+ Halide::Func in_bounded = Halide::BoundaryConditions::repeat_edge(input);
+ Halide::Func blur_col =
+ GenerateBlur("blur_col", in_bounded, 1, 0, radius, kernel, col, row);
+ output(col, row) = Halide::cast<int16_t>(
+ GenerateBlur("blur_row", blur_col, 0, 1, radius, kernel, col, row)(col, row));
+
+ // Vectorize along the col dimension. Most of the data needed by each lane
+ // overlaps this way. This also has the advantage of being the first
+ // dimension, so as we move along it we will have good data locality.
+ blur_col.vectorize(col, 8);
+
+ // The fun part: we tile the algorithm. This tile size is designed to fit
+ // within each CPU core's L1 cache. On the boundaries of the tiles, we end
+ // re-computing the first blur, but fitting within the L1 cache is worth it.
+ Halide::Var col_inner("col_inner"), row_inner("row_inner");
+ output.tile(col, row, col_inner, row_inner, 64, 32);
+ Halide::Var tile_index("tile_index");
+ output.fuse(col, row, tile_index);
+
+ // Compute the first blur as needed for the second one, within each tile.
+ blur_col.compute_at(output, tile_index);
+ // And then vectorize the second blur within each tile.
+ output.vectorize(col_inner, 8);
+
+ // Lastly, compute all the tiles in parallel.
+ output.parallel(tile_index);
+
+ SetRowMajor(&output, cols, rows);
+ }
+};
+
+class SubtractGenerator : public Halide::Generator<SubtractGenerator> {
+ public:
+ GeneratorParam<int> cols{"cols", 0};
+ GeneratorParam<int> rows{"rows", 0};
+
+ Input<Buffer<int16_t>> input_a{"input_a", 2};
+ Input<Buffer<int16_t>> input_b{"input_b", 2};
+ Output<Buffer<int16_t>> output{"output", 2};
+
+ Var col{"col"}, row{"row"};
+
+ void generate() {
+ CHECK(cols > 0) << ": Must specify a cols";
+ CHECK(rows > 0) << ": Must specify a rows";
+
+ SetRowMajor(&input_a, cols, rows);
+ SetRowMajor(&input_b, cols, rows);
+
+ output(col, row) = Halide::saturating_cast<int16_t>(
+ Halide::cast<int32_t>(input_a(col, row)) - input_b(col, row));
+ output.vectorize(col, 16);
+
+ SetRowMajor(&output, cols, rows);
+ }
+};
+
+class GaussianAndSubtractGenerator
+ : public Halide::Generator<GaussianAndSubtractGenerator> {
+ public:
+ GeneratorParam<int> cols{"cols", 0};
+ GeneratorParam<int> rows{"rows", 0};
+ GeneratorParam<double> sigma{"sigma", -1};
+ GeneratorParam<int> filter_width{"filter_width", 0};
+
+ Input<Buffer<int16_t>> input{"input", 2};
+ Output<Buffer<int16_t>> blurred{"blurred", 2};
+ Output<Buffer<int16_t>> difference{"difference", 2};
+
+ // We use opencv's naming convention, instead of the (x, y) which most of the
+ // halide examples use. This is easier to keep straight with the row-major
+ // storage order we're using though.
+ // col is first because incrementing the data index by 1 moves over 1 column.
+ Var col{"col"}, row{"row"};
+
+ void generate() {
+ CHECK(cols > 0) << ": Must specify a cols";
+ CHECK(rows > 0) << ": Must specify a rows";
+ CHECK(sigma > 0) << ": Must specify a sigma";
+ CHECK(filter_width > 0) << ": Must specify a filter_width";
+ CHECK((filter_width % 2) == 1)
+ << ": Invalid filter_width: " << static_cast<int>(filter_width);
+
+ SetRowMajor(&input, cols, rows);
+
+ const int radius = (filter_width - 1) / 2;
+ const cv::Mat kernel =
+ cv::getGaussianKernel(filter_width, sigma, CV_32F)
+ .rowRange(radius, filter_width);
+
+ Halide::Func in_bounded = Halide::BoundaryConditions::repeat_edge(input);
+ Halide::Func blur_col =
+ GenerateBlur("blur_col", in_bounded, 1, 0, radius, kernel, col, row);
+ blurred(col, row) = Halide::cast<int16_t>(
+ GenerateBlur("blur_row", blur_col, 0, 1, radius, kernel, col, row)(col, row));
+ difference(col, row) = Halide::saturating_cast<int16_t>(
+ Halide::cast<int32_t>(blurred(col, row)) - input(col, row));
+
+ // Vectorize along the col dimension. Most of the data needed by each lane
+ // overlaps this way. This also has the advantage of being the first
+ // dimension, so as we move along it we will have good data locality.
+ blur_col.vectorize(col, 8);
+
+ // The fun part: we tile the algorithm. This tile size is designed to fit
+ // within each CPU core's L1 cache. On the boundaries of the tiles, we end
+ // re-computing the first blur, but fitting within the L1 cache is worth it.
+ Halide::Var col_inner("col_inner"), row_inner("row_inner");
+ blurred.tile(col, row, col_inner, row_inner, 64, 32);
+ Halide::Var tile_index("tile_index");
+ blurred.fuse(col, row, tile_index);
+
+ // Compute the first blur as needed for the second one, within each tile.
+ blur_col.compute_at(blurred, tile_index);
+ // And then vectorize the second blur within each tile.
+ blurred.vectorize(col_inner, 8);
+
+ // Lastly, compute all the tiles in parallel.
+ blurred.parallel(tile_index);
+ blurred.compute_root();
+
+ // TODO(Brian): Calulate difference within each of the tiles to speed things
+ // up.
+
+ SetRowMajor(&blurred, cols, rows);
+ SetRowMajor(&difference, cols, rows);
+ }
+};
+
+} // namespace vision
+} // namespace frc971
+
+HALIDE_REGISTER_GENERATOR(frc971::vision::GaussianGenerator, gaussian_generator)
+HALIDE_REGISTER_GENERATOR(frc971::vision::SubtractGenerator, subtract_generator)
+HALIDE_REGISTER_GENERATOR(frc971::vision::GaussianAndSubtractGenerator,
+ gaussian_and_subtract_generator)
diff --git a/y2020/vision/sift/fast_gaussian_runner.py b/y2020/vision/sift/fast_gaussian_runner.py
new file mode 100755
index 0000000..9699fef
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian_runner.py
@@ -0,0 +1,194 @@
+from __future__ import print_function
+
+import json
+import sys
+import subprocess
+import os
+import threading
+
+from bazel_tools.tools.python.runfiles import runfiles
+
+def main(params):
+ r = runfiles.Create()
+ generator = r.Rlocation('org_frc971/y2020/vision/sift/fast_gaussian_generator')
+
+ ruledir = sys.argv[2]
+ target_cpu = sys.argv[3]
+
+ target = {
+ 'armhf-debian': 'arm-32-linux-no_asserts',
+ 'k8': 'x86-64-linux-no_asserts',
+ }[target_cpu]
+
+ commands = []
+
+ env = os.environ.copy()
+ env['LD_LIBRARY_PATH'] = ':'.join([
+ 'debian_amd64_sysroot/lib/x86_64-linux-gnu',
+ 'debian_amd64_sysroot/lib',
+ 'debian_amd64_sysroot/usr/lib/x86_64-linux-gnu',
+ 'debian_amd64_sysroot/usr/lib',
+ ])
+
+ all_header = [
+ '#ifndef Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+ '#define Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+ '#include "HalideBuffer.h"',
+ ]
+
+ for cols, rows in params['sizes']:
+ for sigma, sigma_name, filter_width in params['sigmas']:
+ name = "fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name)
+
+ commands.append([
+ generator,
+ '-g', 'gaussian_generator',
+ '-o', ruledir,
+ '-f', name,
+ '-e', 'o,h,html',
+ 'target=%s-no_runtime' % target,
+ 'cols=%s' % cols,
+ 'rows=%s' % rows,
+ 'sigma=%s' % sigma,
+ 'filter_width=%s' % filter_width,
+ ])
+ all_header += [
+ '#include "y2020/vision/sift/%s.h"' % name,
+ ]
+
+ name = "fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name)
+
+ commands.append([
+ generator,
+ '-g', 'gaussian_and_subtract_generator',
+ '-o', ruledir,
+ '-f', name,
+ '-e', 'o,h,html',
+ 'target=%s-no_runtime' % target,
+ 'cols=%s' % cols,
+ 'rows=%s' % rows,
+ 'sigma=%s' % sigma,
+ 'filter_width=%s' % filter_width,
+ ])
+ all_header += [
+ '#include "y2020/vision/sift/%s.h"' % name,
+ ]
+
+ name = 'fast_subtract_%dx%d' % (cols, rows)
+ commands.append([
+ generator,
+ '-g', 'subtract_generator',
+ '-o', ruledir,
+ '-f', name,
+ '-e', 'o,h,html',
+ 'target=%s-no_runtime' % target,
+ 'cols=%s' % cols,
+ 'rows=%s' % rows,
+ ])
+ all_header += [
+ '#include "y2020/vision/sift/%s.h"' % name,
+ ]
+ commands.append([
+ generator,
+ '-r', 'fast_gaussian_runtime',
+ '-o', ruledir,
+ '-e', 'o',
+ 'target=%s' % target,
+ ])
+
+ all_header += [
+ 'namespace frc971 {',
+ 'namespace vision {',
+ '// 0 is success. 1 is non-implemented size. Negative is a Halide error.',
+ 'inline int DoGeneratedFastGaussian(',
+ ' Halide::Runtime::Buffer<const int16_t, 2> input,',
+ ' Halide::Runtime::Buffer<int16_t, 2> output,',
+ ' double sigma) {',
+ ]
+
+ for sigma, sigma_name, filter_width in params['sigmas']:
+ for cols, rows in params['sizes']:
+ name = "fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name)
+ all_header += [
+ ' if (input.dim(0).extent() == %s' % cols,
+ ' && input.dim(1).extent() == %s' % rows,
+ ' && sigma == %s) {' % sigma,
+ ' return %s(input, output);' % name,
+ ' }',
+ ]
+
+ all_header += [
+ ' return 1;',
+ '}',
+ 'inline int DoGeneratedFastGaussianAndSubtract(',
+ ' Halide::Runtime::Buffer<const int16_t, 2> input,',
+ ' Halide::Runtime::Buffer<int16_t, 2> blurred,',
+ ' Halide::Runtime::Buffer<int16_t, 2> difference,',
+ ' double sigma) {',
+ ]
+
+ for sigma, sigma_name, filter_width in params['sigmas']:
+ for cols, rows in params['sizes']:
+ name = "fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name)
+ all_header += [
+ ' if (input.dim(0).extent() == %s' % cols,
+ ' && input.dim(1).extent() == %s' % rows,
+ ' && sigma == %s) {' % sigma,
+ ' return %s(input, blurred, difference);' % name,
+ ' }',
+ ]
+
+ all_header += [
+ ' return 1;',
+ '}',
+ 'inline int DoGeneratedFastSubtract('
+ ' Halide::Runtime::Buffer<const int16_t, 2> input_a,',
+ ' Halide::Runtime::Buffer<const int16_t, 2> input_b,',
+ ' Halide::Runtime::Buffer<int16_t, 2> output) {',
+ ]
+ for cols, rows in params['sizes']:
+ name = 'fast_subtract_%dx%d' % (cols, rows)
+ all_header += [
+ ' if (input_a.dim(0).extent() == %s' % cols,
+ ' && input_a.dim(1).extent() == %s) {' % rows,
+ ' return %s(input_a, input_b, output);' % name,
+ ' }',
+ ]
+ all_header += [
+ ' return 1;',
+ '}',
+ '} // namespace vision',
+ '} // namespace frc971',
+ '#endif // Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+ ]
+
+ with open(os.path.join(ruledir, 'fast_gaussian_all.h'), 'w') as f:
+ f.writelines([line + '\n' for line in all_header])
+
+ commands_lock = threading.Lock()
+ success = [True]
+
+ def run_commands():
+ while True:
+ with commands_lock:
+ if not commands:
+ return
+ if not success[0]:
+ return
+ command = commands.pop()
+ try:
+ subprocess.check_call(command, env=env)
+ except:
+ with commands_lock:
+ success[0] = False
+ raise
+ threads = [threading.Thread(target=run_commands) for _ in range(4)]
+ for thread in threads:
+ thread.start()
+ for thread in threads:
+ thread.join()
+ if not success[0]:
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main(json.loads(sys.argv[1]))
diff --git a/y2020/vision/sift/sift971.cc b/y2020/vision/sift/sift971.cc
index 6223f77..7152906 100644
--- a/y2020/vision/sift/sift971.cc
+++ b/y2020/vision/sift/sift971.cc
@@ -111,6 +111,9 @@
#include <stdarg.h>
#include <opencv2/core/hal/hal.hpp>
#include <opencv2/imgproc.hpp>
+#include "glog/logging.h"
+
+#include "y2020/vision/sift/fast_gaussian.h"
using namespace cv;
@@ -158,7 +161,7 @@
// factor used to convert floating-point descriptor to unsigned char
static const float SIFT_INT_DESCR_FCTR = 512.f;
-#define DoG_TYPE_SHORT 0
+#define DoG_TYPE_SHORT 1
#if DoG_TYPE_SHORT
// intermediate type used for DoG pyramids
typedef short sift_wt;
@@ -177,37 +180,7 @@
scale = octave >= 0 ? 1.f / (1 << octave) : (float)(1 << -octave);
}
-static Mat createInitialImage(const Mat &img, bool doubleImageSize,
- float sigma) {
- Mat gray, gray_fpt;
- if (img.channels() == 3 || img.channels() == 4) {
- cvtColor(img, gray, COLOR_BGR2GRAY);
- gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
- } else
- img.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
-
- float sig_diff;
-
- if (doubleImageSize) {
- sig_diff = sqrtf(
- std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f));
- Mat dbl;
-#if DoG_TYPE_SHORT
- resize(gray_fpt, dbl, Size(gray_fpt.cols * 2, gray_fpt.rows * 2), 0, 0,
- INTER_LINEAR_EXACT);
-#else
- resize(gray_fpt, dbl, Size(gray_fpt.cols * 2, gray_fpt.rows * 2), 0, 0,
- INTER_LINEAR);
-#endif
- GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff);
- return dbl;
- } else {
- sig_diff = sqrtf(
- std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f));
- GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff);
- return gray_fpt;
- }
-}
+constexpr bool kLogTiming = false;
} // namespace
@@ -229,14 +202,19 @@
for (int o = 0; o < nOctaves; o++) {
for (int i = 0; i < nOctaveLayers + 3; i++) {
Mat &dst = pyr[o * (nOctaveLayers + 3) + i];
- if (o == 0 && i == 0) dst = base;
- // base of new octave is halved image from end of previous octave
- else if (i == 0) {
+ if (o == 0 && i == 0) {
+ dst = base;
+ } else if (i == 0) {
+ // base of new octave is halved image from end of previous octave
const Mat &src = pyr[(o - 1) * (nOctaveLayers + 3) + nOctaveLayers];
resize(src, dst, Size(src.cols / 2, src.rows / 2), 0, 0, INTER_NEAREST);
} else {
const Mat &src = pyr[o * (nOctaveLayers + 3) + i - 1];
- GaussianBlur(src, dst, Size(), sig[i], sig[i]);
+ if (use_fast_gaussian_pyramid_) {
+ FastGaussian(src, &dst, sig[i]);
+ } else {
+ GaussianBlur(src, dst, Size(), sig[i], sig[i]);
+ }
}
}
}
@@ -247,8 +225,12 @@
class buildDoGPyramidComputer : public ParallelLoopBody {
public:
buildDoGPyramidComputer(int _nOctaveLayers, const std::vector<Mat> &_gpyr,
- std::vector<Mat> &_dogpyr)
- : nOctaveLayers(_nOctaveLayers), gpyr(_gpyr), dogpyr(_dogpyr) {}
+ std::vector<Mat> &_dogpyr,
+ bool use_fast_subtract_dogpyr)
+ : nOctaveLayers(_nOctaveLayers),
+ gpyr(_gpyr),
+ dogpyr(_dogpyr),
+ use_fast_subtract_dogpyr_(use_fast_subtract_dogpyr) {}
void operator()(const cv::Range &range) const override {
const int begin = range.start;
@@ -260,15 +242,21 @@
const Mat &src1 = gpyr[o * (nOctaveLayers + 3) + i];
const Mat &src2 = gpyr[o * (nOctaveLayers + 3) + i + 1];
+ CHECK_EQ(a, o * (nOctaveLayers + 2) + i);
Mat &dst = dogpyr[o * (nOctaveLayers + 2) + i];
- subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
+ if (use_fast_subtract_dogpyr_) {
+ FastSubtract(src2, src1, &dst);
+ } else {
+ subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
+ }
}
}
private:
- int nOctaveLayers;
+ const int nOctaveLayers;
const std::vector<Mat> &gpyr;
std::vector<Mat> &dogpyr;
+ const bool use_fast_subtract_dogpyr_;
};
} // namespace
@@ -278,8 +266,97 @@
int nOctaves = (int)gpyr.size() / (nOctaveLayers + 3);
dogpyr.resize(nOctaves * (nOctaveLayers + 2));
+#if 0
parallel_for_(Range(0, nOctaves * (nOctaveLayers + 2)),
- buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr));
+ buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr, use_fast_subtract_dogpyr_));
+#else
+ buildDoGPyramidComputer(
+ nOctaveLayers, gpyr, dogpyr,
+ use_fast_subtract_dogpyr_)(Range(0, nOctaves * (nOctaveLayers + 2)));
+#endif
+}
+
+// base is the image to start with.
+// gpyr is the pyramid of gaussian blurs. This is both an output and a place
+// where we store intermediates.
+// dogpyr is the pyramid of gaussian differences which we fill out.
+// number_octaves is the number of octaves to calculate.
+void SIFT971_Impl::buildGaussianAndDifferencePyramid(
+ const cv::Mat &base, std::vector<cv::Mat> &gpyr,
+ std::vector<cv::Mat> &dogpyr, int number_octaves) const {
+ const int layers_per_octave = nOctaveLayers;
+ // We use the base (possibly after downscaling) as the first "blurred" image.
+ // Then we calculate 2 more than the number of octaves.
+ // TODO(Brian): Why are there 2 extra?
+ const int gpyr_layers_per_octave = layers_per_octave + 3;
+ // There is 1 less difference than the number of blurs.
+ const int dogpyr_layers_per_octave = gpyr_layers_per_octave - 1;
+ gpyr.resize(number_octaves * gpyr_layers_per_octave);
+ dogpyr.resize(number_octaves * dogpyr_layers_per_octave);
+
+ std::vector<double> sig(gpyr_layers_per_octave);
+ // precompute Gaussian sigmas using the following formula:
+ // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
+ sig[0] = sigma;
+ double k = std::pow(2., 1. / layers_per_octave);
+ for (int i = 1; i < gpyr_layers_per_octave; i++) {
+ double sig_prev = std::pow<double>(k, i - 1) * sigma;
+ double sig_total = sig_prev * k;
+ sig[i] = std::sqrt(sig_total * sig_total - sig_prev * sig_prev);
+ }
+
+ for (int octave = 0; octave < number_octaves; octave++) {
+ // At the beginning of each octave, calculate the new base image.
+ {
+ Mat &dst = gpyr[octave * gpyr_layers_per_octave];
+ if (octave == 0) {
+ // For the first octave, it's just the base image.
+ dst = base;
+ } else {
+ // For the other octaves, it's a halved version of the end of the
+ // previous octave.
+ const Mat &src = gpyr[(octave - 1) * gpyr_layers_per_octave +
+ gpyr_layers_per_octave - 1];
+ resize(src, dst, Size(src.cols / 2, src.rows / 2), 0, 0, INTER_NEAREST);
+ }
+ }
+ // We start with layer==1 because the "first layer" is just the base image
+ // (or a downscaled version of it).
+ for (int layer = 1; layer < gpyr_layers_per_octave; layer++) {
+ // The index where the current layer starts.
+ const int layer_index = octave * gpyr_layers_per_octave + layer;
+ if (use_fast_pyramid_difference_) {
+ const Mat &input = gpyr[layer_index - 1];
+ Mat &blurred = gpyr[layer_index];
+ Mat &difference =
+ dogpyr[octave * dogpyr_layers_per_octave + (layer - 1)];
+ FastGaussianAndSubtract(input, &blurred, &difference, sig[layer]);
+ } else {
+ // First, calculate the new gaussian blur.
+ {
+ const Mat &src = gpyr[layer_index - 1];
+ Mat &dst = gpyr[layer_index];
+ if (use_fast_gaussian_pyramid_) {
+ FastGaussian(src, &dst, sig[layer]);
+ } else {
+ GaussianBlur(src, dst, Size(), sig[layer], sig[layer]);
+ }
+ }
+
+ // Then, calculate the difference from the previous one.
+ {
+ const Mat &src1 = gpyr[layer_index - 1];
+ const Mat &src2 = gpyr[layer_index];
+ Mat &dst = dogpyr[octave * dogpyr_layers_per_octave + (layer - 1)];
+ if (use_fast_subtract_dogpyr_) {
+ FastSubtract(src2, src1, &dst);
+ } else {
+ subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
+ }
+ }
+ }
+ }
+ }
}
namespace {
@@ -1073,7 +1150,7 @@
std::vector<KeyPoint> &keypoints,
OutputArray _descriptors,
bool useProvidedKeypoints) {
- int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0;
+ int firstOctave = 0, actualNOctaves = 0, actualNLayers = 0;
Mat image = _image.getMat(), mask = _mask.getMat();
if (image.empty() || image.depth() != CV_8U)
@@ -1084,6 +1161,7 @@
CV_Error(Error::StsBadArg, "mask has incorrect type (!=CV_8UC1)");
if (useProvidedKeypoints) {
+ LOG_IF(INFO, kLogTiming);
firstOctave = 0;
int maxOctave = INT_MIN;
for (size_t i = 0; i < keypoints.size(); i++) {
@@ -1100,35 +1178,39 @@
actualNOctaves = maxOctave - firstOctave + 1;
}
- Mat base = createInitialImage(image, firstOctave < 0, (float)sigma);
+ LOG_IF(INFO, kLogTiming);
+ Mat base = createInitialImage(image, firstOctave < 0);
+ LOG_IF(INFO, kLogTiming);
std::vector<Mat> gpyr;
- int nOctaves =
- actualNOctaves > 0
- ? actualNOctaves
- : cvRound(std::log((double)std::min(base.cols, base.rows)) /
- std::log(2.) -
- 2) -
- firstOctave;
-
- // double t, tf = getTickFrequency();
- // t = (double)getTickCount();
- buildGaussianPyramid(base, gpyr, nOctaves);
-
- // t = (double)getTickCount() - t;
- // printf("pyramid construction time: %g\n", t*1000./tf);
+ int nOctaves;
+ if (actualNOctaves > 0) {
+ nOctaves = actualNOctaves;
+ } else {
+ nOctaves = cvRound(std::log((double)std::min(base.cols, base.rows)) /
+ std::log(2.) -
+ 2) -
+ firstOctave;
+ }
if (!useProvidedKeypoints) {
std::vector<Mat> dogpyr;
- buildDoGPyramid(gpyr, dogpyr);
- // t = (double)getTickCount();
+ if (use_fused_pyramid_difference_) {
+ buildGaussianAndDifferencePyramid(base, gpyr, dogpyr, nOctaves);
+ LOG_IF(INFO, kLogTiming);
+ } else {
+ buildGaussianPyramid(base, gpyr, nOctaves);
+ LOG_IF(INFO, kLogTiming);
+
+ buildDoGPyramid(gpyr, dogpyr);
+ LOG_IF(INFO, kLogTiming);
+ }
+
findScaleSpaceExtrema(gpyr, dogpyr, keypoints);
// TODO(Brian): I think it can go faster by knowing they're sorted?
// KeyPointsFilter::removeDuplicatedSorted( keypoints );
KeyPointsFilter::removeDuplicated(keypoints);
if (nfeatures > 0) KeyPointsFilter::retainBest(keypoints, nfeatures);
- // t = (double)getTickCount() - t;
- // printf("keypoint detection time: %g\n", t*1000./tf);
if (firstOctave < 0)
for (size_t i = 0; i < keypoints.size(); i++) {
@@ -1140,20 +1222,54 @@
}
if (!mask.empty()) KeyPointsFilter::runByPixelsMask(keypoints, mask);
+ LOG_IF(INFO, kLogTiming);
} else {
+ buildGaussianPyramid(base, gpyr, nOctaves);
+ LOG_IF(INFO, kLogTiming);
// filter keypoints by mask
// KeyPointsFilter::runByPixelsMask( keypoints, mask );
}
if (_descriptors.needed()) {
- // t = (double)getTickCount();
int dsize = descriptorSize();
_descriptors.create((int)keypoints.size(), dsize, CV_32F);
Mat descriptors = _descriptors.getMat();
calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
- // t = (double)getTickCount() - t;
- // printf("descriptor extraction time: %g\n", t*1000./tf);
+ LOG_IF(INFO, kLogTiming);
+ }
+}
+
+Mat SIFT971_Impl::createInitialImage(const Mat &img,
+ bool doubleImageSize) const {
+ Mat gray, gray_fpt;
+ if (img.channels() == 3 || img.channels() == 4) {
+ cvtColor(img, gray, COLOR_BGR2GRAY);
+ gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
+ } else {
+ img.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
+ }
+
+ float sig_diff;
+
+ Mat maybe_doubled;
+ if (doubleImageSize) {
+ sig_diff = std::sqrt(
+ std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01));
+ resize(gray_fpt, maybe_doubled, Size(gray_fpt.cols * 2, gray_fpt.rows * 2),
+ 0, 0, INTER_LINEAR);
+ } else {
+ sig_diff = std::sqrt(
+ std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01));
+ maybe_doubled = gray_fpt;
+ }
+ if (use_fast_guassian_initial_) {
+ Mat temp;
+ FastGaussian(maybe_doubled, &temp, sig_diff);
+ return temp;
+ } else {
+ GaussianBlur(maybe_doubled, maybe_doubled, Size(), sig_diff, sig_diff);
+ return maybe_doubled;
}
}
diff --git a/y2020/vision/sift/sift971.h b/y2020/vision/sift/sift971.h
index d58dec8..b351d70 100644
--- a/y2020/vision/sift/sift971.h
+++ b/y2020/vision/sift/sift971.h
@@ -41,6 +41,10 @@
int nOctaves) const;
void buildDoGPyramid(const std::vector<cv::Mat> &pyr,
std::vector<cv::Mat> &dogpyr) const;
+ void buildGaussianAndDifferencePyramid(const cv::Mat &base,
+ std::vector<cv::Mat> &pyr,
+ std::vector<cv::Mat> &dogpyr,
+ int nOctaves) const;
void findScaleSpaceExtrema(const std::vector<cv::Mat> &gauss_pyr,
const std::vector<cv::Mat> &dog_pyr,
std::vector<cv::KeyPoint> &keypoints) const;
@@ -51,6 +55,15 @@
CV_PROP_RW double contrastThreshold;
CV_PROP_RW double edgeThreshold;
CV_PROP_RW double sigma;
+
+ private:
+ cv::Mat createInitialImage(const cv::Mat &img, bool doubleImageSize) const;
+
+ bool use_fast_gaussian_pyramid_ = true;
+ bool use_fast_subtract_dogpyr_ = true;
+ bool use_fast_guassian_initial_ = true;
+ bool use_fused_pyramid_difference_ = true;
+ bool use_fast_pyramid_difference_ = true;
};
} // namespace vision
diff --git a/y2020/vision/sift/testing_sift.cc b/y2020/vision/sift/testing_sift.cc
new file mode 100644
index 0000000..d4b1306
--- /dev/null
+++ b/y2020/vision/sift/testing_sift.cc
@@ -0,0 +1,87 @@
+#include <memory>
+
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "aos/init.h"
+#include "aos/time/time.h"
+#include "y2020/vision/sift/fast_gaussian.h"
+#include "glog/logging.h"
+#include "y2020/vision/sift/sift971.h"
+
+DEFINE_string(image, "", "Image to test with");
+
+int main(int argc, char **argv) {
+ aos::InitGoogle(&argc, &argv);
+
+ cv::setNumThreads (4);
+
+ const cv::Mat raw_image = cv::imread(FLAGS_image);
+ CHECK(!raw_image.empty()) << ": Failed to read: " << FLAGS_image;
+ CHECK_EQ(CV_8UC3, raw_image.type());
+#if 0
+ cv::Mat color_image;
+ raw_image.convertTo(color_image, CV_32F, 1.0/255.0);
+ cv::Mat image;
+ cv::cvtColor(color_image, image, cv::COLOR_BGR2GRAY);
+#else
+ cv::Mat gray_image;
+ cv::cvtColor(raw_image, gray_image, cv::COLOR_BGR2GRAY);
+ cv::Mat float_image;
+#if 0
+ gray_image.convertTo(float_image, CV_32F, 0.00390625);
+#else
+ float_image = gray_image;
+#endif
+ cv::Mat image;
+ cv::resize(float_image, image, cv::Size(1280, 720), 0, 0, cv::INTER_AREA);
+#endif
+#if 0
+#if 0
+ cv::namedWindow("source", cv::WINDOW_AUTOSIZE);
+ cv::imshow("source", raw_image);
+ cv::namedWindow("converted", cv::WINDOW_AUTOSIZE);
+ cv::imshow("converted", image);
+#endif
+
+ cv::Mat slow_blurred, fast_blurred;
+ const double sigma = 3.0900155872895909;
+ cv::GaussianBlur(image, slow_blurred, cv::Size(9, 9), sigma, sigma);
+ frc971::vision::FastGaussian(image, &fast_blurred, sigma);
+ cv::namedWindow("slow", cv::WINDOW_AUTOSIZE);
+ cv::imshow("slow", slow_blurred);
+ cv::namedWindow("fast", cv::WINDOW_AUTOSIZE);
+ cv::imshow("fast", fast_blurred);
+ cv::waitKey(0);
+ return 0;
+#endif
+
+ LOG(INFO);
+ std::unique_ptr<frc971::vision::SIFT971_Impl> sift(new frc971::vision::SIFT971_Impl());
+ std::vector<cv::KeyPoint> keypoints;
+ cv::Mat descriptors;
+ LOG(INFO) << "detectAndCompute on " << image.rows << "x" << image.cols;
+ sift->detectAndCompute(image, cv::noArray(), keypoints, descriptors);
+ LOG(INFO);
+
+#if 0
+ return 0;
+#endif
+
+ static constexpr int kIterations = 40;
+ const auto start = aos::monotonic_clock::now();
+ for (int i = 0; i < kIterations; ++i) {
+ keypoints.clear();
+ descriptors.release();
+ sift->detectAndCompute(image, cv::noArray(), keypoints, descriptors);
+ }
+ const auto end = aos::monotonic_clock::now();
+ LOG(INFO)
+ << "Took: "
+ << (std::chrono::duration<double>(end - start) / kIterations).count();
+ // Should be ~352 for FRC-Image4-cleaned.png downscaled to 640x360.
+ // 376 in DoG_TYPE_SHORT mode.
+ // 344 now with 1280x720 non-upscaled.
+ LOG(INFO) << "found " << keypoints.size() << " and " << descriptors.size();
+}