Make SIFT faster
This uses various halide-optimized functions to do the actual image
processing. It still finds around the same number of features, but much
faster.
Change-Id: I9d7f7093b0ec41acf7ed16b2c91cdadada2f9a22
diff --git a/y2020/vision/sift/fast_gaussian_runner.py b/y2020/vision/sift/fast_gaussian_runner.py
new file mode 100755
index 0000000..9699fef
--- /dev/null
+++ b/y2020/vision/sift/fast_gaussian_runner.py
@@ -0,0 +1,194 @@
+from __future__ import print_function
+
+import json
+import sys
+import subprocess
+import os
+import threading
+
+from bazel_tools.tools.python.runfiles import runfiles
+
+def main(params):
+ r = runfiles.Create()
+ generator = r.Rlocation('org_frc971/y2020/vision/sift/fast_gaussian_generator')
+
+ ruledir = sys.argv[2]
+ target_cpu = sys.argv[3]
+
+ target = {
+ 'armhf-debian': 'arm-32-linux-no_asserts',
+ 'k8': 'x86-64-linux-no_asserts',
+ }[target_cpu]
+
+ commands = []
+
+ env = os.environ.copy()
+ env['LD_LIBRARY_PATH'] = ':'.join([
+ 'debian_amd64_sysroot/lib/x86_64-linux-gnu',
+ 'debian_amd64_sysroot/lib',
+ 'debian_amd64_sysroot/usr/lib/x86_64-linux-gnu',
+ 'debian_amd64_sysroot/usr/lib',
+ ])
+
+ all_header = [
+ '#ifndef Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+ '#define Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+ '#include "HalideBuffer.h"',
+ ]
+
+ for cols, rows in params['sizes']:
+ for sigma, sigma_name, filter_width in params['sigmas']:
+ name = "fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name)
+
+ commands.append([
+ generator,
+ '-g', 'gaussian_generator',
+ '-o', ruledir,
+ '-f', name,
+ '-e', 'o,h,html',
+ 'target=%s-no_runtime' % target,
+ 'cols=%s' % cols,
+ 'rows=%s' % rows,
+ 'sigma=%s' % sigma,
+ 'filter_width=%s' % filter_width,
+ ])
+ all_header += [
+ '#include "y2020/vision/sift/%s.h"' % name,
+ ]
+
+ name = "fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name)
+
+ commands.append([
+ generator,
+ '-g', 'gaussian_and_subtract_generator',
+ '-o', ruledir,
+ '-f', name,
+ '-e', 'o,h,html',
+ 'target=%s-no_runtime' % target,
+ 'cols=%s' % cols,
+ 'rows=%s' % rows,
+ 'sigma=%s' % sigma,
+ 'filter_width=%s' % filter_width,
+ ])
+ all_header += [
+ '#include "y2020/vision/sift/%s.h"' % name,
+ ]
+
+ name = 'fast_subtract_%dx%d' % (cols, rows)
+ commands.append([
+ generator,
+ '-g', 'subtract_generator',
+ '-o', ruledir,
+ '-f', name,
+ '-e', 'o,h,html',
+ 'target=%s-no_runtime' % target,
+ 'cols=%s' % cols,
+ 'rows=%s' % rows,
+ ])
+ all_header += [
+ '#include "y2020/vision/sift/%s.h"' % name,
+ ]
+ commands.append([
+ generator,
+ '-r', 'fast_gaussian_runtime',
+ '-o', ruledir,
+ '-e', 'o',
+ 'target=%s' % target,
+ ])
+
+ all_header += [
+ 'namespace frc971 {',
+ 'namespace vision {',
+ '// 0 is success. 1 is non-implemented size. Negative is a Halide error.',
+ 'inline int DoGeneratedFastGaussian(',
+ ' Halide::Runtime::Buffer<const int16_t, 2> input,',
+ ' Halide::Runtime::Buffer<int16_t, 2> output,',
+ ' double sigma) {',
+ ]
+
+ for sigma, sigma_name, filter_width in params['sigmas']:
+ for cols, rows in params['sizes']:
+ name = "fast_gaussian_%dx%d_%s" % (cols, rows, sigma_name)
+ all_header += [
+ ' if (input.dim(0).extent() == %s' % cols,
+ ' && input.dim(1).extent() == %s' % rows,
+ ' && sigma == %s) {' % sigma,
+ ' return %s(input, output);' % name,
+ ' }',
+ ]
+
+ all_header += [
+ ' return 1;',
+ '}',
+ 'inline int DoGeneratedFastGaussianAndSubtract(',
+ ' Halide::Runtime::Buffer<const int16_t, 2> input,',
+ ' Halide::Runtime::Buffer<int16_t, 2> blurred,',
+ ' Halide::Runtime::Buffer<int16_t, 2> difference,',
+ ' double sigma) {',
+ ]
+
+ for sigma, sigma_name, filter_width in params['sigmas']:
+ for cols, rows in params['sizes']:
+ name = "fast_gaussian_subtract_%dx%d_%s" % (cols, rows, sigma_name)
+ all_header += [
+ ' if (input.dim(0).extent() == %s' % cols,
+ ' && input.dim(1).extent() == %s' % rows,
+ ' && sigma == %s) {' % sigma,
+ ' return %s(input, blurred, difference);' % name,
+ ' }',
+ ]
+
+ all_header += [
+ ' return 1;',
+ '}',
+ 'inline int DoGeneratedFastSubtract('
+ ' Halide::Runtime::Buffer<const int16_t, 2> input_a,',
+ ' Halide::Runtime::Buffer<const int16_t, 2> input_b,',
+ ' Halide::Runtime::Buffer<int16_t, 2> output) {',
+ ]
+ for cols, rows in params['sizes']:
+ name = 'fast_subtract_%dx%d' % (cols, rows)
+ all_header += [
+ ' if (input_a.dim(0).extent() == %s' % cols,
+ ' && input_a.dim(1).extent() == %s) {' % rows,
+ ' return %s(input_a, input_b, output);' % name,
+ ' }',
+ ]
+ all_header += [
+ ' return 1;',
+ '}',
+ '} // namespace vision',
+ '} // namespace frc971',
+ '#endif // Y2020_VISION_SIFT_FAST_GAUSSIAN_ALL_H_',
+ ]
+
+ with open(os.path.join(ruledir, 'fast_gaussian_all.h'), 'w') as f:
+ f.writelines([line + '\n' for line in all_header])
+
+ commands_lock = threading.Lock()
+ success = [True]
+
+ def run_commands():
+ while True:
+ with commands_lock:
+ if not commands:
+ return
+ if not success[0]:
+ return
+ command = commands.pop()
+ try:
+ subprocess.check_call(command, env=env)
+ except:
+ with commands_lock:
+ success[0] = False
+ raise
+ threads = [threading.Thread(target=run_commands) for _ in range(4)]
+ for thread in threads:
+ thread.start()
+ for thread in threads:
+ thread.join()
+ if not success[0]:
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main(json.loads(sys.argv[1]))