Merge "Make rehost script more automated"
diff --git a/WORKSPACE b/WORKSPACE
index 19b9bfb..efeb2a4 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -324,13 +324,13 @@
         "linux-aarch64": llvm_opt_copts,
     },
     standard_libraries = {
-        "linux-x86_64": "libstdc++-10",
-        "linux-aarch64": "libstdc++-10",
+        "linux-x86_64": "libstdc++-12",
+        "linux-aarch64": "libstdc++-12",
     },
     static_libstdcxx = False,
     sysroot = {
         "linux-x86_64": "@amd64_debian_sysroot//:sysroot_files",
-        "linux-aarch64": "@arm64_debian_rootfs//:sysroot_files",
+        "linux-aarch64": "@arm64_debian_sysroot//:sysroot_files",
     },
     target_toolchain_roots = {
         "linux-x86_64": "@llvm_k8//",
@@ -451,28 +451,20 @@
     url = "https://software.frc971.org/Build-Dependencies/cortexa9_vfpv3-roborio-academic-2023-x86_64-linux-gnu-Toolchain-12.1.0.tgz",
 )
 
-# The main partition from https://downloads.raspberrypi.org/raspios_lite_armhf/images/raspios_lite_armhf-2021-11-08/2021-10-30-raspios-bullseye-armhf-lite.zip.sig
-# The following files and folders are removed to make bazel happy with it:
-#   usr/share/ca-certificates
-#   lib/systemd/system/system-systemd\\x2dcryptsetup.slice
+# The main partition built from //frc971/orin/build_rootfs.py.
 http_archive(
-    name = "arm64_debian_rootfs",
-    build_file = "@//:compilers/debian_rootfs.BUILD",
-    sha256 = "7e6ad432fec0a36f8b66c3fc2ab8795ea446e61f7dce7a206b55602677cf0904",
-    url = "https://software.frc971.org/Build-Dependencies/2021-10-30-raspios-bullseye-arm64-lite_rootfs.tar.bz2",
+    name = "arm64_debian_sysroot",
+    build_file = "@//:compilers/orin_debian_rootfs.BUILD",
+    sha256 = "a1d8297cebdf8dcc380afaa9703d56864f256775d4d239210d0883a9f2d009bc",
+    url = "https://software.frc971.org/Build-Dependencies/2023-11-18-bookworm-arm64-nvidia-rootfs.tar.xz",
 )
 
-# Created with:
-#   `debootstrap buster buster_sysroot`
-# and then chrooting in and running:
-#   apt install libc6-dev libstdc++-7-dev
-# removing the apt cache,
-# and then tarring up the result
+# Sysroot generated using //frc971/amd64/build_rootfs.py
 http_archive(
     name = "amd64_debian_sysroot",
-    build_file = "@//:compilers/debian_rootfs.BUILD",
-    sha256 = "5e10f4cac85a98a39da1716b218bc05fff4666c61cc471a7df27876710bc86d2",
-    url = "https://software.frc971.org/Build-Dependencies/2022-01-06-debian-bullseye_rootfs.tar.bz2",
+    build_file = "@//:compilers/amd64_debian_rootfs.BUILD",
+    sha256 = "3c098330f8bc57dccb4191167cfbba4c47f3bacf52926479c95ad2e50834b3c2",
+    url = "https://software.frc971.org/Build-Dependencies/2023-11-09-bookworm-amd64-nvidia-rootfs.tar.xz",
 )
 
 local_repository(
diff --git a/aos/ipc_lib/lockless_queue_test.cc b/aos/ipc_lib/lockless_queue_test.cc
index 14701e2..5b57aa9 100644
--- a/aos/ipc_lib/lockless_queue_test.cc
+++ b/aos/ipc_lib/lockless_queue_test.cc
@@ -461,7 +461,7 @@
     LocklessQueueReader::Result read_result = reader.Read(
         i, &monotonic_sent_time, &realtime_sent_time, &monotonic_remote_time,
         &realtime_remote_time, &remote_queue_index, &source_boot_uuid, &length,
-        &(read_data[0]), std::ref(should_read_callback));
+        &(read_data[0]), should_read_callback);
 
     if (read_result != LocklessQueueReader::Result::GOOD) {
       if (read_result == LocklessQueueReader::Result::TOO_OLD) {
diff --git a/aos/ipc_lib/queue_racer.cc b/aos/ipc_lib/queue_racer.cc
index 0b8f1a6..27f3835 100644
--- a/aos/ipc_lib/queue_racer.cc
+++ b/aos/ipc_lib/queue_racer.cc
@@ -320,11 +320,16 @@
     const uint32_t wrapped_i =
         i % static_cast<size_t>(QueueIndex::MaxIndex(
                 0xffffffffu, LocklessQueueSize(queue_.memory())));
-    LocklessQueueReader::Result read_result = reader.Read(
-        wrapped_i, &monotonic_sent_time, &realtime_sent_time,
-        &monotonic_remote_time, &realtime_remote_time, &remote_queue_index,
-        &source_boot_uuid, &length, &(read_data[0]),
-        set_should_read ? std::ref(should_read) : std::ref(nop));
+    LocklessQueueReader::Result read_result =
+        set_should_read
+            ? reader.Read(wrapped_i, &monotonic_sent_time, &realtime_sent_time,
+                          &monotonic_remote_time, &realtime_remote_time,
+                          &remote_queue_index, &source_boot_uuid, &length,
+                          &(read_data[0]), std::ref(should_read))
+            : reader.Read(wrapped_i, &monotonic_sent_time, &realtime_sent_time,
+                          &monotonic_remote_time, &realtime_remote_time,
+                          &remote_queue_index, &source_boot_uuid, &length,
+                          &(read_data[0]), nop);
 
     // The code in lockless_queue.cc reads everything but data, checks that the
     // header hasn't changed, then reads the data.  So, if we succeed and both
diff --git a/compilers/amd64_debian_rootfs.BUILD b/compilers/amd64_debian_rootfs.BUILD
new file mode 100644
index 0000000..8bec407
--- /dev/null
+++ b/compilers/amd64_debian_rootfs.BUILD
@@ -0,0 +1,48 @@
+filegroup(
+    name = "sysroot_files",
+    srcs = glob(
+        # TODO(austin): Only include the base files here.  Need to figure out what those are.
+        # TODO(austin): Generate that list when building the rootfs?
+        include = [
+            "include/**",
+            "lib/**",
+            "lib64/**",
+            "usr/include/**",
+            "usr/lib/**",
+            "usr/bin/**",
+            "usr/lib64/**",
+        ],
+        exclude = [
+            "usr/share/**",
+            "usr/include/thrust/**",
+            "usr/include/nv/**",
+            "usr/include/cuda/**",
+            "usr/include/cub/**",
+            "usr/bin/X11",
+        ],
+    ),
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "nppi",
+    srcs = [
+        "usr/lib/x86_64-linux-gnu/libnppc.so.11",
+        "usr/lib/x86_64-linux-gnu/libnppif.so.11",
+    ],
+    hdrs = glob(
+        include = ["usr/include/nppi*.h"],
+    ),
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cudart",
+    srcs = [
+        "usr/lib/x86_64-linux-gnu/libcuda.so.1",
+        "usr/lib/x86_64-linux-gnu/libcudart.so.11.0",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+# TODO(austin): lzma, gstreamer, opencv
diff --git a/compilers/debian_rootfs.BUILD b/compilers/debian_rootfs.BUILD
deleted file mode 100644
index 7eef9ec..0000000
--- a/compilers/debian_rootfs.BUILD
+++ /dev/null
@@ -1,17 +0,0 @@
-filegroup(
-    name = "sysroot_files",
-    srcs = glob(
-        include = [
-            "include/**",
-            "lib/**",
-            "lib64/**",
-            "usr/include/**",
-            "usr/lib/**",
-            "usr/lib64/**",
-        ],
-        exclude = [
-            "usr/share/**",
-        ],
-    ),
-    visibility = ["//visibility:public"],
-)
diff --git a/compilers/orin_debian_rootfs.BUILD b/compilers/orin_debian_rootfs.BUILD
new file mode 100644
index 0000000..d70d329
--- /dev/null
+++ b/compilers/orin_debian_rootfs.BUILD
@@ -0,0 +1,81 @@
+filegroup(
+    name = "sysroot_files",
+    srcs = glob(
+        include = [
+            "include/**",
+            "lib/**",
+            "lib64/**",
+            "usr/include/**",
+            "usr/local/**",
+            "usr/lib/**",
+            "usr/lib64/**",
+        ],
+        exclude = [
+            "usr/share/**",
+            "usr/local/cuda-11.8/include/thrust/**",
+            "usr/local/cuda-11.8/include/nv/**",
+            "usr/local/cuda-11.8/include/cuda/**",
+            "usr/local/cuda-11.8/include/cub/**",
+        ],
+    ),
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "argus",
+    srcs = [
+        "usr/lib/libnvargus_socketclient.so",
+    ],
+    hdrs = glob(
+        include = ["usr/include/Argus/**"],
+    ),
+    includes = ["usr/include/Argus/utils/"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "eglstream",
+    hdrs = glob(
+        include = ["usr/include/EGLStream/**"],
+    ),
+    includes = ["usr/include/EGLStream/"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "nvbufsurface",
+    srcs = [
+        "usr/lib/libnvbufsurface.so.1.0.0",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "egl",
+    srcs = [
+        "usr/lib/aarch64-linux-gnu/libEGL.so",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "nppi",
+    srcs = [
+        "usr/local/cuda-11.8/lib/libnppc.so.11",
+        "usr/local/cuda-11.8/lib/libnppif.so.11",
+    ],
+    hdrs = glob(
+        include = ["usr/local/cuda-11.8/include/npp*.h"],
+    ),
+    includes = ["usr/local/cuda-11.8/include"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cudart",
+    srcs = [
+        "usr/lib/libcuda.so.1",
+        "usr/local/cuda-11.8/lib/libcudart.so.11.0",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/frc971/BUILD b/frc971/BUILD
index 5613737..4287462 100644
--- a/frc971/BUILD
+++ b/frc971/BUILD
@@ -2,6 +2,8 @@
 
 package(default_visibility = ["//visibility:public"])
 
+exports_files(["halide_generator.sh"])
+
 cc_library(
     name = "shifter_hall_effect",
     hdrs = [
@@ -32,3 +34,19 @@
     gen_reflections = 1,
     visibility = ["//visibility:public"],
 )
+
+sh_binary(
+    name = "halide_generator_compile_script",
+    srcs = [
+        "//frc971:halide_generator.sh",
+    ],
+    data = [
+        "@amd64_debian_sysroot//:sysroot_files",
+        "@deb_zlib1g_dev_1_2_11_dfsg_2_amd64_deb_repo//file",
+        "@halide_k8//:build_files",
+        "@llvm_toolchain//:all-components-x86_64-linux",
+    ],
+    deps = [
+        "@bazel_tools//tools/bash/runfiles",
+    ],
+)
diff --git a/frc971/amd64/build_rootfs.py b/frc971/amd64/build_rootfs.py
new file mode 100755
index 0000000..cc0138e
--- /dev/null
+++ b/frc971/amd64/build_rootfs.py
@@ -0,0 +1,181 @@
+#!/usr/bin/python3
+
+import contextlib
+import datetime
+import pathlib
+import subprocess
+import shlex
+import os
+import sys
+
+REQUIRED_DEPS = ["debootstrap"]
+
+ROOTFS_FOLDER = "/tmp/rootfs"
+
+
+@contextlib.contextmanager
+def scoped_bind_mount(partition):
+    """Bind mounts a folder from the host into the rootfs."""
+    result = subprocess.run(
+        ["sudo", "mount", "--bind", partition, f"{ROOTFS_FOLDER}/{partition}"],
+        check=True)
+
+    try:
+        yield partition
+    finally:
+        subprocess.run(["sudo", "umount", f"{ROOTFS_FOLDER}/{partition}"],
+                       check=True)
+
+
+def check_required_deps(deps):
+    """Checks if the provided list of dependencies is installed."""
+    missing_deps = []
+    for dep in deps:
+        result = subprocess.run(["dpkg-query", "-W", "-f='${Status}'", dep],
+                                check=True,
+                                stdout=subprocess.PIPE)
+
+        if "install ok installed" not in result.stdout.decode('utf-8'):
+            missing_deps.append(dep)
+
+    if len(missing_deps) > 0:
+        print("Missing dependencies, please install:")
+        print("sudo apt-get install", " ".join(missing_deps))
+        return True
+
+    return False
+
+
+def target_unescaped(cmd):
+    """Runs a command as root with bash -c cmd, ie without escaping."""
+    subprocess.run([
+        "sudo", "chroot", "--userspec=0:0", f"{ROOTFS_FOLDER}", "/bin/bash",
+        "-c", cmd
+    ],
+                   check=True)
+
+
+def target(cmd):
+    """Runs a command as root with escaping."""
+    target_unescaped(shlex.join([shlex.quote(c) for c in cmd]))
+
+
+def copyfile(owner, permissions, file):
+    """Copies a file from contents/{file} with the provided owner and permissions."""
+    print("copyfile", owner, permissions, file)
+    subprocess.run(
+        ["sudo", "cp", f"contents/{file}", f"{ROOTFS_FOLDER}/{file}"],
+        check=True)
+    subprocess.run(["sudo", "chmod", permissions, f"{ROOTFS_FOLDER}/{file}"],
+                   check=True)
+    target(["chown", owner, f"/{file}"])
+
+
+def target_symlink(owner, permissions, link_target, linkname):
+    full_linkname = f"{ROOTFS_FOLDER}/{linkname}"
+    print(link_target)
+    print(full_linkname)
+    if not os.path.exists(full_linkname):
+        target(["ln", "-s", link_target, linkname])
+
+    assert (pathlib.Path(full_linkname).is_symlink())
+
+    target(["chown", owner, linkname])
+    target(["chmod", permissions, linkname])
+
+
+def target_mkdir(owner_group, permissions, folder):
+    """Creates a directory recursively with the provided permissions and ownership."""
+    print("target_mkdir", owner_group, permissions, folder)
+    owner, group = owner_group.split('.')
+    target(
+        ["install", "-d", "-m", permissions, "-o", owner, "-g", group, folder])
+
+
+def main():
+    if check_required_deps(REQUIRED_DEPS):
+        return 1
+
+    new_image = not os.path.exists(ROOTFS_FOLDER)
+    if new_image:
+        os.mkdir(ROOTFS_FOLDER)
+
+    if new_image:
+        subprocess.run([
+            "sudo", "debootstrap", "--no-check-gpg", "bookworm", ROOTFS_FOLDER,
+            "http://deb.debian.org/debian/"
+        ],
+                       check=True)
+
+    if not os.path.exists(
+            f"{ROOTFS_FOLDER}/etc/apt/sources.list.d/bullseye-backports.list"):
+        copyfile("root.root", "644",
+                 "etc/apt/sources.list.d/bullseye-backports.list")
+        target(["apt-get", "update"])
+
+    with scoped_bind_mount("/dev") as _:
+        with scoped_bind_mount("/proc") as _:
+            target([
+                "apt-get",
+                "-y",
+                "install",
+                "libopencv-calib3d406",
+                "libopencv-contrib406",
+                "libopencv-core406",
+                "libopencv-features2d406",
+                "libopencv-flann406",
+                "libopencv-highgui406",
+                "libopencv-imgcodecs406",
+                "libopencv-imgproc406",
+                "libopencv-ml406",
+                "libopencv-objdetect406",
+                "libopencv-photo406",
+                "libopencv-shape406",
+                "libopencv-stitching406",
+                "libopencv-superres406",
+                "libopencv-video406",
+                "libopencv-videoio406",
+                "libopencv-videostab406",
+                "libopencv-viz406",
+                "libv4l-dev",
+                "libc6-dev",
+                "libstdc++-12-dev",
+                "nvidia-cuda-dev",
+                "nvidia-cuda-toolkit",
+            ])
+
+    target_mkdir("root.root", "755", "usr/lib/cuda/bin")
+    target_symlink("root.root", "555", "../../../bin/fatbinary",
+                   "usr/lib/cuda/bin/x86_64-unknown-linux-gnu-fatbinary")
+
+    target(["apt-get", "clean"])
+
+    target(["ldconfig"])
+
+    tarball = datetime.date.today().strftime(
+        f"{os.getcwd()}/%Y-%m-%d-bookworm-amd64-nvidia-rootfs.tar")
+    print(tarball)
+
+    subprocess.run([
+        "sudo",
+        "tar",
+        "--exclude=./usr/share/ca-certificates",
+        "--exclude=./usr/src",
+        "--exclude=./usr/lib/mesa-diverted",
+        "--exclude=./usr/bin/X11",
+        "--exclude=./usr/lib/systemd/system/system-systemd*cryptsetup.slice",
+        "--exclude=./dev",
+        "-cf",
+        tarball,
+        ".",
+    ],
+                   cwd=ROOTFS_FOLDER,
+                   check=True)
+
+    subprocess.run(["sha256sum", tarball], check=True)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/frc971/amd64/contents/etc/apt/sources.list.d/bullseye-backports.list b/frc971/amd64/contents/etc/apt/sources.list.d/bullseye-backports.list
new file mode 100644
index 0000000..219e4bc
--- /dev/null
+++ b/frc971/amd64/contents/etc/apt/sources.list.d/bullseye-backports.list
@@ -0,0 +1,12 @@
+deb http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware
+deb-src http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware
+
+deb https://security.debian.org/debian-security bookworm-security main contrib non-free non-free-firmware
+deb-src https://security.debian.org/debian-security bookworm-security main contrib non-free non-free-firmware
+
+# bookworm-updates, previously known as 'volatile'
+deb http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware
+deb-src http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware
+
+deb http://deb.debian.org/debian bullseye-backports main
+deb http://deb.debian.org/debian bullseye main
diff --git a/frc971/halide.bzl b/frc971/halide.bzl
new file mode 100644
index 0000000..452716b
--- /dev/null
+++ b/frc971/halide.bzl
@@ -0,0 +1,51 @@
+def halide_library(name, src, function, args, visibility = None):
+    native.genrule(
+        name = name + "_build_generator",
+        outs = [
+            name + "_generator",
+        ],
+        srcs = [
+            src,
+        ],
+        cmd = "$(location //frc971:halide_generator_compile_script) $(OUTS) $(location " + src + ")",
+        tools = [
+            "//frc971:halide_generator_compile_script",
+        ],
+    )
+    native.genrule(
+        name = "generate_" + name,
+        srcs = [
+            ":" + name + "_generator",
+        ],
+        outs = [
+            name + ".h",
+            name + ".o",
+            name + ".stmt.html",
+        ],
+        # TODO(austin): Upgrade halide...
+        cmd = "$(location :" + name + "_generator) -g '" + function + "' -o $(RULEDIR) -f " + name + " -e 'o,h,html' " + select({
+            "@platforms//cpu:x86_64": "target=host ",
+            "@platforms//cpu:aarch64": "target=arm-64-linux-sve2-arm_dot_prod-arm_fp16-armv81a ",
+            "//conditions:default": "",
+        }) + args,
+        target_compatible_with = select({
+            "@platforms//cpu:x86_64": [],
+            "@platforms//cpu:arm64": [],
+            "//conditions:default": ["@platforms//:incompatible"],
+        }) + ["@platforms//os:linux"],
+    )
+
+    native.cc_library(
+        name = name,
+        srcs = [name + ".o"],
+        hdrs = [name + ".h"],
+        visibility = visibility,
+        target_compatible_with = select({
+            "@platforms//cpu:x86_64": [],
+            "@platforms//cpu:arm64": [],
+            "//conditions:default": ["@platforms//:incompatible"],
+        }) + ["@platforms//os:linux"],
+        deps = [
+            "//third_party:halide_runtime",
+        ],
+    )
diff --git a/frc971/halide_generator.sh b/frc971/halide_generator.sh
new file mode 100755
index 0000000..d6e5d41
--- /dev/null
+++ b/frc971/halide_generator.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# We need to build code linked against Halide. This means we need to use a
+# compatible ABI. This means we need to use libstdc++, not libc++ like our main
+# toolchains are set up for.
+#
+# Rebuilding Halide itself is only moderately annoying. However, it needs to
+# link against LLVM, which is a much bigger pain to rebuild with libc++.
+#
+# To deal with this problem, this script runs clang hermetically on the
+# appropriate sources.
+# --- begin runfiles.bash initialization v2 ---
+# Copy-pasted from the Bazel Bash runfiles library v2.
+set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
+source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
+  source "$0.runfiles/$f" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
+# --- end runfiles.bash initialization v2 ---
+BINARY="$1"
+SOURCE="$2"
+HALIDE="$(rlocation halide_k8)"
+SYSROOT="$(rlocation amd64_debian_sysroot)"
+ZLIB1G_DEV_AMD64_DEB="$(rlocation deb_zlib1g_dev_1_2_11_dfsg_2_amd64_deb_repo/file/zlib1g-dev_1.2.11.dfsg-2_amd64.deb)"
+ZLIB1G_DEV="$(mktemp -d)"
+LLVM_TOOLCHAIN="$(dirname "$(dirname "$(rlocation llvm_k8/bin/clang)")")"
+dpkg-deb -x "${ZLIB1G_DEV_AMD64_DEB}" "${ZLIB1G_DEV}"
+TARGET=x86_64-unknown-linux-gnu
+MULTIARCH=x86_64-linux-gnu
+export LD_LIBRARY_PATH="${SYSROOT}/usr/lib:${SYSROOT}/lib:${ZLIB1G_DEV}/usr/lib/${MULTIARCH}"
+"${LLVM_TOOLCHAIN}/bin/clang++" \
+  -fcolor-diagnostics \
+  -I"${HALIDE}/include" \
+  -nostdinc \
+  -isystem"${SYSROOT}/usr/include/c++/12" \
+  -isystem"${SYSROOT}/usr/include/${MULTIARCH}/c++/12" \
+  -isystem"${SYSROOT}/usr/include/c++/7/backward" \
+  -isystem"${LLVM_TOOLCHAIN}/lib/clang/17/include" \
+  -isystem"${SYSROOT}/usr/include/${MULTIARCH}" \
+  -isystem"${SYSROOT}/usr/include" \
+  -isystem"${SYSROOT}/include" \
+  "--sysroot=${SYSROOT}" \
+  -resource-dir "${LLVM_TOOLCHAIN}/lib/clang/17" \
+  -target "${TARGET}" \
+  -fuse-ld=lld \
+  -L"${LLVM_TOOLCHAIN}/lib" \
+  -L"${SYSROOT}/usr/lib" \
+  -L"${SYSROOT}/usr/lib/gcc/${MULTIARCH}/7" \
+  -L"${ZLIB1G_DEV}/usr/lib/${MULTIARCH}" \
+  "${HALIDE}/lib/libHalide.a" \
+  -lstdc++ -lpthread -ldl -lm -lz \
+  -std=gnu++20 \
+  "${SOURCE}" \
+  "${HALIDE}/share/Halide/tools/GenGen.cpp" \
+  -ggdb3 \
+  -o "${BINARY}"
diff --git a/frc971/orin/BUILD b/frc971/orin/BUILD
new file mode 100644
index 0000000..5675ab4
--- /dev/null
+++ b/frc971/orin/BUILD
@@ -0,0 +1,9 @@
+load("//frc971:halide.bzl", "halide_library")
+
+halide_library(
+    name = "ycbcr",
+    src = "crcv_generator.cc",
+    args = "rows=1088 cols=1456 ystride=2048 cbcrstride=3840",
+    function = "ycbcr",
+    visibility = ["//visibility:public"],
+)
diff --git a/frc971/orin/build_rootfs.py b/frc971/orin/build_rootfs.py
index f82bed7..ddbb853 100755
--- a/frc971/orin/build_rootfs.py
+++ b/frc971/orin/build_rootfs.py
@@ -5,7 +5,9 @@
 import collections
 import subprocess
 import shlex
+import datetime
 import os
+import shutil
 
 IMAGE = "arm64_bookworm_debian_yocto.img"
 YOCTO = "/home/austin/local/jetpack/robot-yocto/build"
@@ -295,7 +297,8 @@
             "gstreamer1.0-plugins-ugly", "gstreamer1.0-nice", "usbutils",
             "locales", "trace-cmd", "clinfo", "jq", "strace", "sysstat",
             "lm-sensors", "can-utils", "xfsprogs", "gstreamer1.0-tools",
-            "bridge-utils", "net-tools", "apt-file", "parted", "xxd"
+            "bridge-utils", "net-tools", "apt-file", "parted", "xxd",
+            "libv4l-dev"
         ])
         target(["apt-get", "clean"])
 
@@ -311,16 +314,43 @@
         install_virtual_packages(virtual_packages)
 
         yocto_package_names = [
-            'tegra-argus-daemon', 'tegra-firmware', 'tegra-firmware-tegra234',
-            'tegra-firmware-vic', 'tegra-firmware-xusb',
-            'tegra-libraries-argus-daemon-base', 'tegra-libraries-camera',
-            'tegra-libraries-core', 'tegra-libraries-cuda',
-            'tegra-libraries-eglcore', 'tegra-libraries-glescore',
-            'tegra-libraries-glxcore', 'tegra-libraries-multimedia',
+            'tegra-argus-daemon',
+            'tegra-firmware',
+            'tegra-firmware-tegra234',
+            'tegra-firmware-vic',
+            'tegra-firmware-xusb',
+            'tegra-libraries-argus-daemon-base',
+            'tegra-libraries-camera',
+            'tegra-libraries-core',
+            'tegra-libraries-cuda',
+            'tegra-libraries-eglcore',
+            'tegra-libraries-glescore',
+            'tegra-libraries-glxcore',
+            'tegra-libraries-multimedia',
             'tegra-libraries-multimedia-utils',
-            'tegra-libraries-multimedia-v4l', 'tegra-libraries-nvsci',
-            'tegra-libraries-vulkan', 'tegra-nvphs', 'tegra-nvphs-base',
-            'libnvidia-egl-wayland1'
+            'tegra-libraries-multimedia-v4l',
+            'tegra-libraries-nvsci',
+            'tegra-libraries-vulkan',
+            'tegra-nvphs',
+            'tegra-nvphs-base',
+            'libnvidia-egl-wayland1',
+            'tegra-mmapi',
+            'tegra-mmapi-dev',
+            'cuda-cudart-11-8',
+            'cuda-cudart-11-8-dev',
+            'cuda-cudart-11-8-stubs',
+            'libcurand-11-8',
+            'libcurand-11-8-dev',
+            'libcurand-11-8-stubs',
+            'cuda-nvcc-11-8',
+            'tegra-cmake-overrides',
+            'cuda-target-environment',
+            'libnpp-11-8',
+            'libnpp-11-8-stubs',
+            'libnpp-11-8-dev',
+            'cuda-cccl-11-8',
+            'cuda-nvcc-11-8',
+            'cuda-nvcc-headers-11-8',
         ]
         yocto_packages = list_yocto_packages()
         packages = list_packages()
@@ -407,6 +437,56 @@
             )
             target(["vim", "-c", "\":qa!\""])
 
+        tarball = datetime.date.today().strftime(
+            f"{os.getcwd()}/%Y-%m-%d-bookworm-arm64-nvidia-rootfs.tar")
+        print(tarball)
+
+        subprocess.run([
+            "sudo",
+            "tar",
+            "--exclude=./usr/share/ca-certificates",
+            "--exclude=./home",
+            "--exclude=./root",
+            "--exclude=./usr/src",
+            "--exclude=./usr/lib/mesa-diverted",
+            "--exclude=./usr/bin/X11",
+            "--exclude=./usr/lib/systemd/system/system-systemd*cryptsetup.slice",
+            "--exclude=./dev",
+            "--exclude=./usr/local/cuda-11.8/bin/fatbinary",
+            "--exclude=./usr/local/cuda-11.8/bin/ptxas",
+            "-cf",
+            tarball,
+            ".",
+        ],
+                       cwd=partition,
+                       check=True)
+
+        # Pack ptxas and fatbinary into the spots that clang expect them to make compiling easy.
+        nvidia_cuda_toolkit_path = 'nvidia-cuda-toolkit'
+        if not os.path.exists(nvidia_cuda_toolkit_path):
+            os.mkdir(nvidia_cuda_toolkit_path)
+
+            subprocess.run(['apt-get', 'download', 'nvidia-cuda-toolkit'],
+                           cwd=nvidia_cuda_toolkit_path,
+                           check=True)
+
+            subprocess.run(
+                ['dpkg', '-x',
+                 os.listdir(nvidia_cuda_toolkit_path)[0], '.'],
+                cwd=nvidia_cuda_toolkit_path,
+                check=True)
+
+        subprocess.run([
+            "sudo", "tar",
+            '--transform=s|usr/bin/ptxas|usr/local/cuda-11.8/bin/ptxas|',
+            '--transform=s|usr/bin/fatbinary|usr/local/cuda-11.8/bin/aarch64-unknown-linux-gnu-fatbinary|',
+            "--append", "-f", tarball, "usr/bin/fatbinary", "usr/bin/ptxas"
+        ],
+                       cwd=nvidia_cuda_toolkit_path,
+                       check=True)
+
+        subprocess.run(["sha256sum", tarball], check=True)
+
 
 if __name__ == '__main__':
     main()
diff --git a/frc971/orin/crcv_generator.cc b/frc971/orin/crcv_generator.cc
new file mode 100644
index 0000000..99318e6
--- /dev/null
+++ b/frc971/orin/crcv_generator.cc
@@ -0,0 +1,96 @@
+#include <iostream>
+
+#include "Halide.h"
+
+#define CHECK(x, message, ...)                                              \
+  do {                                                                      \
+    if (!(x)) {                                                             \
+      fprintf(stderr, "assertion failed: " message ": %s\n", ##__VA_ARGS__, \
+              #x);                                                          \
+      abort();                                                              \
+    }                                                                       \
+  } while (0)
+
+// This is a Halide "generator". This means it is a binary which generates
+// ahead-of-time optimized functions as directed by command-line arguments.
+// https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html has an
+// introduction to much of the magic in this file.
+namespace frc971 {
+namespace orin {
+namespace {
+
+template <typename T>
+void SetRowMajor(T *buffer_parameter, int cols, int rows) {
+  buffer_parameter->dim(0).set_stride(3);
+  buffer_parameter->dim(0).set_extent(cols);
+  buffer_parameter->dim(0).set_min(0);
+
+  buffer_parameter->dim(1).set_stride(cols * 3);
+  buffer_parameter->dim(1).set_extent(rows);
+  buffer_parameter->dim(1).set_min(0);
+
+  buffer_parameter->dim(2).set_stride(1);
+  buffer_parameter->dim(2).set_extent(3);
+  buffer_parameter->dim(2).set_min(0);
+}
+}  // namespace
+
+// Takes an image with y in one plane with a provided stride, and cbcr in
+// another with a provided stride and makes a ycbcr output image.
+class YCbCr : public Halide::Generator<YCbCr> {
+ public:
+  GeneratorParam<int> cols{"cols", 0};
+  GeneratorParam<int> rows{"rows", 0};
+  GeneratorParam<int> ystride{"ystride", 0};
+  GeneratorParam<int> cbcrstride{"cbcrstride", 0};
+
+  Input<Buffer<uint8_t, 2>> input_y{"y"};
+  Input<Buffer<uint8_t, 3>> input_cbcr{"cbcr"};
+  Output<Buffer<uint8_t, 3>> output{"output"};
+
+  Var col{"col"}, row{"row"}, channel{"channel"};
+
+  // Everything is indexed as col, row, channel.
+  void generate() {
+    CHECK(cols > 0, "Must specify a cols");
+    CHECK(rows > 0, "Must specify a rows");
+
+    input_y.dim(0).set_stride(1);
+    input_y.dim(0).set_extent(cols);
+    input_y.dim(0).set_min(0);
+
+    input_y.dim(1).set_stride(ystride);
+    input_y.dim(1).set_extent(rows);
+    input_y.dim(1).set_min(0);
+
+    input_cbcr.dim(0).set_stride(2);
+    input_cbcr.dim(0).set_extent(cols);
+    input_cbcr.dim(0).set_min(0);
+
+    input_cbcr.dim(1).set_stride(cbcrstride);
+    input_cbcr.dim(1).set_extent(rows);
+    input_cbcr.dim(1).set_min(0);
+
+    input_cbcr.dim(2).set_stride(1);
+    input_cbcr.dim(2).set_extent(2);
+    input_cbcr.dim(2).set_min(0);
+
+    output(col, row, channel) =
+        Halide::select(channel == 0, input_y(col, row),
+                       Halide::select(channel == 1, input_cbcr(col, row, 0),
+                                      input_cbcr(col, row, 1)));
+
+    output.reorder(channel, col, row);
+    output.unroll(channel);
+
+    output.vectorize(col, 8);
+    output.unroll(col, 4);
+
+    SetRowMajor(&output, cols, rows);
+  }
+};
+
+}  // namespace orin
+}  // namespace frc971
+
+HALIDE_REGISTER_GENERATOR(frc971::orin::YCbCr, ycbcr)
diff --git a/frc971/vision/geometry.h b/frc971/vision/geometry.h
index 7858418..3285446 100644
--- a/frc971/vision/geometry.h
+++ b/frc971/vision/geometry.h
@@ -1,6 +1,8 @@
 #ifndef FRC971_VISION_GEOMETRY_H_
 #define FRC971_VISION_GEOMETRY_H_
 
+#include <optional>
+
 #include "glog/logging.h"
 #include "opencv2/core/types.hpp"
 
diff --git a/y2020/vision/sift/fast_gaussian_halide_generator.sh b/y2020/vision/sift/fast_gaussian_halide_generator.sh
index 560e54f..87fb20d 100755
--- a/y2020/vision/sift/fast_gaussian_halide_generator.sh
+++ b/y2020/vision/sift/fast_gaussian_halide_generator.sh
@@ -40,8 +40,8 @@
   -fcolor-diagnostics \
   -I"${HALIDE}/include" \
   -nostdinc \
-  -isystem"${SYSROOT}/usr/include/c++/10" \
-  -isystem"${SYSROOT}/usr/include/${MULTIARCH}/c++/10" \
+  -isystem"${SYSROOT}/usr/include/c++/12" \
+  -isystem"${SYSROOT}/usr/include/${MULTIARCH}/c++/12" \
   -isystem"${SYSROOT}/usr/include/c++/7/backward" \
   -isystem"${LLVM_TOOLCHAIN}/lib/clang/17/include" \
   -isystem"${SYSROOT}/usr/include/${MULTIARCH}" \
diff --git a/y2023/vision/BUILD b/y2023/vision/BUILD
index 8e72263..6a67316 100644
--- a/y2023/vision/BUILD
+++ b/y2023/vision/BUILD
@@ -1,6 +1,7 @@
+load("//frc971:halide.bzl", "halide_library")
+load("//tools/build_rules:select.bzl", "cpu_select")
 load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
 load("@com_github_google_flatbuffers//:typescript.bzl", "flatbuffer_ts_library")
-load("//tools/build_rules:select.bzl", "cpu_select")
 
 cc_binary(
     name = "camera_reader",
@@ -101,6 +102,22 @@
     ],
 )
 
+halide_library(
+    name = "ToGreyscaleAndDecimateHalide",
+    src = "april_generator.cc",
+    args = "rows=720 cols=1280",
+    function = "decimate_generator",
+    visibility = ["//visibility:public"],
+)
+
+halide_library(
+    name = "ThresholdHalide",
+    src = "april_generator.cc",
+    args = "rows=360 cols=640",
+    function = "threshold_generator",
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "aprilrobotics_lib",
     srcs = [
diff --git a/y2023/vision/april_generator.cc b/y2023/vision/april_generator.cc
new file mode 100644
index 0000000..4ac389a
--- /dev/null
+++ b/y2023/vision/april_generator.cc
@@ -0,0 +1,157 @@
+#include "Halide.h"
+
+#define CHECK(x, message, ...)                                              \
+  do {                                                                      \
+    if (!(x)) {                                                             \
+      fprintf(stderr, "assertion failed: " message ": %s\n", ##__VA_ARGS__, \
+              #x);                                                          \
+      abort();                                                              \
+    }                                                                       \
+  } while (0)
+
+// This is a Halide "generator". This means it is a binary which generates
+// ahead-of-time optimized functions as directed by command-line arguments.
+// https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html has an
+// introduction to much of the magic in this file.
+
+namespace frc971 {
+namespace vision {
+namespace {
+
+// Returns a function implementating a 1-dimensional gaussian blur convolution.
+Halide::Func GenerateBlur(std::string name, Halide::Func in, int col_step,
+                          int row_step, int radius, std::vector<float> kernel,
+                          Halide::Var col, Halide::Var row) {
+  Halide::Expr expr = kernel[0] * in(col, row);
+  for (int i = 1; i <= radius; ++i) {
+    expr += kernel[0] * (in(col - i * col_step, row - i * row_step) +
+                         in(col + i * col_step, row + i * row_step));
+  }
+  Halide::Func func(name);
+  func(col, row) = expr;
+  return func;
+}
+
+template <typename T>
+void SetRowMajor(T *buffer_parameter, int cols, int rows) {
+  buffer_parameter->dim(0).set_stride(1);
+  buffer_parameter->dim(0).set_extent(cols);
+  buffer_parameter->dim(0).set_min(0);
+  buffer_parameter->dim(1).set_stride(cols);
+  buffer_parameter->dim(1).set_extent(rows);
+  buffer_parameter->dim(1).set_min(0);
+}
+
+}  // namespace
+
+class DecimateGenerator : public Halide::Generator<DecimateGenerator> {
+ public:
+  GeneratorParam<int> cols{"cols", 0};
+  GeneratorParam<int> rows{"rows", 0};
+
+  Input<Buffer<uint8_t>> input{"input", 3};
+  Output<Buffer<uint8_t>> output{"output", 2};
+  Output<Buffer<uint8_t>> decimated_output{"decimated_output", 2};
+
+  Var col{"col"}, row{"row"};
+
+  void generate() {
+    CHECK(cols > 0, "Must specify a cols");
+    CHECK(rows > 0, "Must specify a rows");
+
+    input.dim(0).set_stride(2);
+    input.dim(0).set_extent(cols);
+    input.dim(0).set_min(0);
+
+    input.dim(1).set_stride(cols * 2);
+    input.dim(1).set_extent(rows);
+    input.dim(1).set_min(0);
+
+    input.dim(2).set_stride(1);
+    input.dim(2).set_extent(2);
+    input.dim(2).set_min(0);
+
+    output(col, row) = input(col, row, 0);
+    decimated_output(col, row) = output(col * 2, row * 2);
+
+    decimated_output.compute_at(output, col);
+
+    decimated_output.vectorize(col, 16);
+
+    SetRowMajor(&output, cols, rows);
+
+    SetRowMajor(&decimated_output, cols / 2, rows / 2);
+  }
+};
+
+class ThresholdGenerator : public Halide::Generator<ThresholdGenerator> {
+ public:
+  GeneratorParam<int> rows{"rows", 0};
+  GeneratorParam<int> cols{"cols", 0};
+
+  Input<Buffer<uint8_t>> input{"input", 2};
+  Output<Buffer<uint8_t>> output{"output", 2};
+
+  Var x{"x"}, y{"y"};
+
+  Func threshold{"threshold"}, threshold_max{"threshold_max"},
+      threshold_min{"threshold_min"},
+      convoluted_threshold_max{"convoluted_threshold_max"},
+      convoluted_threshold_min{"convoluted_threshold_min"};
+
+  void generate() {
+    CHECK(cols > 0, "Columns must be more than 0");
+    CHECK(rows > 0, "Rows must be more than 0");
+
+    const int tile_size = 4;
+
+    RDom r(0, tile_size, 0, tile_size);
+
+    threshold_max(x, y) =
+        maximum(input(r.x + x * tile_size, r.y + y * tile_size));
+    threshold_min(x, y) =
+        minimum(input(r.x + x * tile_size, r.y + y * tile_size));
+
+    RDom r_conv(-1, 3, -1, 3);
+
+    convoluted_threshold_max(x, y) =
+        maximum(threshold_max(clamp(x + r_conv.x, 0, cols / tile_size - 1),
+                              clamp(y + r_conv.y, 0, rows / tile_size - 1)));
+
+    convoluted_threshold_min(x, y) =
+        minimum(threshold_min(clamp(x + r_conv.x, 0, cols / tile_size - 1),
+                              clamp(y + r_conv.y, 0, rows / tile_size - 1)));
+
+    threshold(x, y) =
+        convoluted_threshold_min(x, y) +
+        (convoluted_threshold_max(x, y) - convoluted_threshold_min(x, y)) / 2;
+
+    output(x, y) =
+        select(convoluted_threshold_max(x / tile_size, y / tile_size) -
+                       convoluted_threshold_min(x / tile_size, y / tile_size) <
+                   5,
+               Expr((uint8_t)(127)),
+               select(input(x, y) > threshold(x / tile_size, y / tile_size),
+                      Expr((uint8_t)(255)), Expr((uint8_t)(0))));
+
+    SetRowMajor(&output, cols, rows);
+
+    Var xi, yi;
+
+    output.compute_root().tile(x, y, xi, yi, tile_size, tile_size);
+    threshold.compute_root();
+    convoluted_threshold_min.compute_root();
+    convoluted_threshold_max.compute_root();
+    threshold_min.compute_root();
+    threshold_max.compute_root();
+  }
+};
+
+}  // namespace vision
+}  // namespace frc971
+
+// TODO(austin): Combine the functions and optimize for device/host and all that
+// jazz.
+HALIDE_REGISTER_GENERATOR(frc971::vision::DecimateGenerator, decimate_generator)
+HALIDE_REGISTER_GENERATOR(frc971::vision::ThresholdGenerator,
+                          threshold_generator)