Add rule to build halide generators and add a CrCv generator

This lets us pack images from 2 planes on the ORIN into one plane.

Change-Id: Id6fe6a9a0a267e167f8433c2f7cb26b39c330f60
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/frc971/BUILD b/frc971/BUILD
index 5613737..4287462 100644
--- a/frc971/BUILD
+++ b/frc971/BUILD
@@ -2,6 +2,8 @@
 
 package(default_visibility = ["//visibility:public"])
 
+exports_files(["halide_generator.sh"])
+
 cc_library(
     name = "shifter_hall_effect",
     hdrs = [
@@ -32,3 +34,19 @@
     gen_reflections = 1,
     visibility = ["//visibility:public"],
 )
+
+sh_binary(
+    name = "halide_generator_compile_script",
+    srcs = [
+        "//frc971:halide_generator.sh",
+    ],
+    data = [
+        "@amd64_debian_sysroot//:sysroot_files",
+        "@deb_zlib1g_dev_1_2_11_dfsg_2_amd64_deb_repo//file",
+        "@halide_k8//:build_files",
+        "@llvm_toolchain//:all-components-x86_64-linux",
+    ],
+    deps = [
+        "@bazel_tools//tools/bash/runfiles",
+    ],
+)
diff --git a/frc971/halide.bzl b/frc971/halide.bzl
new file mode 100644
index 0000000..452716b
--- /dev/null
+++ b/frc971/halide.bzl
@@ -0,0 +1,51 @@
+def halide_library(name, src, function, args, visibility = None):
+    native.genrule(
+        name = name + "_build_generator",
+        outs = [
+            name + "_generator",
+        ],
+        srcs = [
+            src,
+        ],
+        cmd = "$(location //frc971:halide_generator_compile_script) $(OUTS) $(location " + src + ")",
+        tools = [
+            "//frc971:halide_generator_compile_script",
+        ],
+    )
+    native.genrule(
+        name = "generate_" + name,
+        srcs = [
+            ":" + name + "_generator",
+        ],
+        outs = [
+            name + ".h",
+            name + ".o",
+            name + ".stmt.html",
+        ],
+        # TODO(austin): Upgrade halide...
+        cmd = "$(location :" + name + "_generator) -g '" + function + "' -o $(RULEDIR) -f " + name + " -e 'o,h,html' " + select({
+            "@platforms//cpu:x86_64": "target=host ",
+            "@platforms//cpu:aarch64": "target=arm-64-linux-sve2-arm_dot_prod-arm_fp16-armv81a ",
+            "//conditions:default": "",
+        }) + args,
+        target_compatible_with = select({
+            "@platforms//cpu:x86_64": [],
+            "@platforms//cpu:arm64": [],
+            "//conditions:default": ["@platforms//:incompatible"],
+        }) + ["@platforms//os:linux"],
+    )
+
+    native.cc_library(
+        name = name,
+        srcs = [name + ".o"],
+        hdrs = [name + ".h"],
+        visibility = visibility,
+        target_compatible_with = select({
+            "@platforms//cpu:x86_64": [],
+            "@platforms//cpu:arm64": [],
+            "//conditions:default": ["@platforms//:incompatible"],
+        }) + ["@platforms//os:linux"],
+        deps = [
+            "//third_party:halide_runtime",
+        ],
+    )
diff --git a/frc971/halide_generator.sh b/frc971/halide_generator.sh
new file mode 100755
index 0000000..01cb29c
--- /dev/null
+++ b/frc971/halide_generator.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# We need to build code linked against Halide. This means we need to use a
+# compatible ABI. This means we need to use libstdc++, not libc++ like our main
+# toolchains are set up for.
+#
+# Rebuilding Halide itself is only moderately annoying. However, it needs to
+# link against LLVM, which is a much bigger pain to rebuild with libc++.
+#
+# To deal with this problem, this script runs clang hermetically on the
+# appropriate sources.
+# --- begin runfiles.bash initialization v2 ---
+# Copy-pasted from the Bazel Bash runfiles library v2.
+set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
+source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
+  source "$0.runfiles/$f" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
+# --- end runfiles.bash initialization v2 ---
+BINARY="$1"
+SOURCE="$2"
+HALIDE="$(rlocation halide_k8)"
+SYSROOT="$(rlocation amd64_debian_sysroot)"
+ZLIB1G_DEV_AMD64_DEB="$(rlocation deb_zlib1g_dev_1_2_11_dfsg_2_amd64_deb_repo/file/zlib1g-dev_1.2.11.dfsg-2_amd64.deb)"
+ZLIB1G_DEV="$(mktemp -d)"
+LLVM_TOOLCHAIN="$(dirname "$(dirname "$(rlocation llvm_k8/bin/clang)")")"
+dpkg-deb -x "${ZLIB1G_DEV_AMD64_DEB}" "${ZLIB1G_DEV}"
+TARGET=x86_64-unknown-linux-gnu
+MULTIARCH=x86_64-linux-gnu
+export LD_LIBRARY_PATH="${SYSROOT}/usr/lib:${SYSROOT}/lib:${ZLIB1G_DEV}/usr/lib/${MULTIARCH}"
+"${LLVM_TOOLCHAIN}/bin/clang++" \
+  -fcolor-diagnostics \
+  -I"${HALIDE}/include" \
+  -nostdinc \
+  -isystem"${SYSROOT}/usr/include/c++/10" \
+  -isystem"${SYSROOT}/usr/include/${MULTIARCH}/c++/10" \
+  -isystem"${SYSROOT}/usr/include/c++/7/backward" \
+  -isystem"${LLVM_TOOLCHAIN}/lib/clang/17/include" \
+  -isystem"${SYSROOT}/usr/include/${MULTIARCH}" \
+  -isystem"${SYSROOT}/usr/include" \
+  -isystem"${SYSROOT}/include" \
+  "--sysroot=${SYSROOT}" \
+  -resource-dir "${LLVM_TOOLCHAIN}/lib/clang/17" \
+  -target "${TARGET}" \
+  -fuse-ld=lld \
+  -L"${LLVM_TOOLCHAIN}/lib" \
+  -L"${SYSROOT}/usr/lib" \
+  -L"${SYSROOT}/usr/lib/gcc/${MULTIARCH}/7" \
+  -L"${ZLIB1G_DEV}/usr/lib/${MULTIARCH}" \
+  "${HALIDE}/lib/libHalide.a" \
+  -lstdc++ -lpthread -ldl -lm -lz \
+  -std=gnu++20 \
+  "${SOURCE}" \
+  "${HALIDE}/share/Halide/tools/GenGen.cpp" \
+  -ggdb3 \
+  -o "${BINARY}"
diff --git a/frc971/orin/BUILD b/frc971/orin/BUILD
new file mode 100644
index 0000000..5675ab4
--- /dev/null
+++ b/frc971/orin/BUILD
@@ -0,0 +1,9 @@
+load("//frc971:halide.bzl", "halide_library")
+
+halide_library(
+    name = "ycbcr",
+    src = "crcv_generator.cc",
+    args = "rows=1088 cols=1456 ystride=2048 cbcrstride=3840",
+    function = "ycbcr",
+    visibility = ["//visibility:public"],
+)
diff --git a/frc971/orin/crcv_generator.cc b/frc971/orin/crcv_generator.cc
new file mode 100644
index 0000000..99318e6
--- /dev/null
+++ b/frc971/orin/crcv_generator.cc
@@ -0,0 +1,96 @@
+#include <iostream>
+
+#include "Halide.h"
+
+#define CHECK(x, message, ...)                                              \
+  do {                                                                      \
+    if (!(x)) {                                                             \
+      fprintf(stderr, "assertion failed: " message ": %s\n", ##__VA_ARGS__, \
+              #x);                                                          \
+      abort();                                                              \
+    }                                                                       \
+  } while (0)
+
+// This is a Halide "generator". This means it is a binary which generates
+// ahead-of-time optimized functions as directed by command-line arguments.
+// https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html has an
+// introduction to much of the magic in this file.
+namespace frc971 {
+namespace orin {
+namespace {
+
+template <typename T>
+void SetRowMajor(T *buffer_parameter, int cols, int rows) {
+  buffer_parameter->dim(0).set_stride(3);
+  buffer_parameter->dim(0).set_extent(cols);
+  buffer_parameter->dim(0).set_min(0);
+
+  buffer_parameter->dim(1).set_stride(cols * 3);
+  buffer_parameter->dim(1).set_extent(rows);
+  buffer_parameter->dim(1).set_min(0);
+
+  buffer_parameter->dim(2).set_stride(1);
+  buffer_parameter->dim(2).set_extent(3);
+  buffer_parameter->dim(2).set_min(0);
+}
+}  // namespace
+
+// Takes an image with y in one plane with a provided stride, and cbcr in
+// another with a provided stride and makes a ycbcr output image.
+class YCbCr : public Halide::Generator<YCbCr> {
+ public:
+  GeneratorParam<int> cols{"cols", 0};
+  GeneratorParam<int> rows{"rows", 0};
+  GeneratorParam<int> ystride{"ystride", 0};
+  GeneratorParam<int> cbcrstride{"cbcrstride", 0};
+
+  Input<Buffer<uint8_t, 2>> input_y{"y"};
+  Input<Buffer<uint8_t, 3>> input_cbcr{"cbcr"};
+  Output<Buffer<uint8_t, 3>> output{"output"};
+
+  Var col{"col"}, row{"row"}, channel{"channel"};
+
+  // Everything is indexed as col, row, channel.
+  void generate() {
+    CHECK(cols > 0, "Must specify a cols");
+    CHECK(rows > 0, "Must specify a rows");
+
+    input_y.dim(0).set_stride(1);
+    input_y.dim(0).set_extent(cols);
+    input_y.dim(0).set_min(0);
+
+    input_y.dim(1).set_stride(ystride);
+    input_y.dim(1).set_extent(rows);
+    input_y.dim(1).set_min(0);
+
+    input_cbcr.dim(0).set_stride(2);
+    input_cbcr.dim(0).set_extent(cols);
+    input_cbcr.dim(0).set_min(0);
+
+    input_cbcr.dim(1).set_stride(cbcrstride);
+    input_cbcr.dim(1).set_extent(rows);
+    input_cbcr.dim(1).set_min(0);
+
+    input_cbcr.dim(2).set_stride(1);
+    input_cbcr.dim(2).set_extent(2);
+    input_cbcr.dim(2).set_min(0);
+
+    output(col, row, channel) =
+        Halide::select(channel == 0, input_y(col, row),
+                       Halide::select(channel == 1, input_cbcr(col, row, 0),
+                                      input_cbcr(col, row, 1)));
+
+    output.reorder(channel, col, row);
+    output.unroll(channel);
+
+    output.vectorize(col, 8);
+    output.unroll(col, 4);
+
+    SetRowMajor(&output, cols, rows);
+  }
+};
+
+}  // namespace orin
+}  // namespace frc971
+
+HALIDE_REGISTER_GENERATOR(frc971::orin::YCbCr, ycbcr)