Sandbox libxml2 and switch clang to zstd compression

When trying to execute clang on a very very fresh Debian image, it can't
find libxml2.  Sandbox that too.

While we are here, zstd images extract faster than xz.  Switch over.

Change-Id: Ia196ae49223b488f5eabe28b67e6f274b3072795
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/WORKSPACE b/WORKSPACE
index 719cf93..42756e2 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -188,6 +188,10 @@
     "//debian:xvfb_amd64.bzl",
     xvfb_amd64_debs = "files",
 )
+load(
+    "//debian:clang_amd64.bzl",
+    clang_amd64_debs = "files",
+)
 load("//debian:packages.bzl", "generate_repositories_for_debs")
 
 generate_repositories_for_debs(rsync_debs)
@@ -231,6 +235,8 @@
 
 generate_repositories_for_debs(xvfb_amd64_debs)
 
+generate_repositories_for_debs(clang_amd64_debs)
+
 local_repository(
     name = "com_grail_bazel_toolchain",
     path = "third_party/bazel-toolchain",
@@ -249,7 +255,7 @@
 
 llvm(
     name = "llvm_k8",
-    distribution = "clang+llvm-%s-x86_64-linux-gnu-ubuntu-22.04.tar.xz" % llvm_version,
+    distribution = "clang+llvm-%s-x86_64-linux-gnu-ubuntu-22.04.tar.zst" % llvm_version,
     llvm_version = llvm_version,
 )
 
@@ -458,8 +464,8 @@
 http_archive(
     name = "amd64_debian_sysroot",
     build_file = "@//:compilers/amd64_debian_rootfs.BUILD",
-    sha256 = "5d9d4131f3997d8543d45e673bfb15e21d7ca4c64923da91ee8d06f801dddb59",
-    url = "https://software.frc971.org/Build-Dependencies/2023-12-10-bookworm-amd64-nvidia-rootfs.tar.xz",
+    sha256 = "ceaf7e3fd4af04aca2ff0d55c94ce30c2b45d1136b0e81e9be5ebc1003f96052",
+    url = "https://software.frc971.org/Build-Dependencies/2023-12-10-bookworm-amd64-nvidia-rootfs.tar.zst",
 )
 
 local_repository(
@@ -1386,19 +1392,28 @@
 )
 
 http_archive(
-    name = "libtinfo5_amd64",
+    name = "clang_amd64_deps",
     build_file_content = """
-exports_files(
-    [
-        'lib/x86_64-linux-gnu/libtinfo.so.5',
-        'lib/x86_64-linux-gnu/libtinfo.so.5.9',
-    ],
-    ["//visibility:public"],
+libs = [
+    'lib/x86_64-linux-gnu/libtinfo.so.5',
+    'lib/x86_64-linux-gnu/libtinfo.so.5.9',
+    'usr/lib/x86_64-linux-gnu/libxml2.so.2',
+    'usr/lib/x86_64-linux-gnu/libxml2.so.2.9.14',
+    'usr/lib/x86_64-linux-gnu/libicuuc.so.72',
+    'usr/lib/x86_64-linux-gnu/libicuuc.so.72.1',
+    'usr/lib/x86_64-linux-gnu/libicudata.so.72',
+    'usr/lib/x86_64-linux-gnu/libicudata.so.72.1',
+]
+exports_files(libs, ["//visibility:public"])
+
+filegroup(
+    name = "all",
+    srcs = libs,
+    visibility = ["//visibility:public"],
 )
 """,
-    patch_cmds = ["touch lib/x86_64-linux-gnu/BUILD"],
-    sha256 = "059e14f77dce365c57b96284aae98c892f61e269b3fbb7d07714b7135c2e5617",
-    urls = ["https://software.frc971.org/Build-Dependencies/libtinfo5_amd64.tar.gz"],
+    sha256 = "6ae7cbedd9b1d54da095d460e2832c2a3e2917fbfa2ed22c6787d4b527a5677d",
+    urls = ["https://software.frc971.org/Build-Dependencies/clang_amd64.tar.gz"],
 )
 
 http_archive(
diff --git a/debian/BUILD b/debian/BUILD
index 72ce799..a7b3b97 100644
--- a/debian/BUILD
+++ b/debian/BUILD
@@ -70,6 +70,10 @@
     ":xvfb_amd64.bzl",
     xvfb_amd64_debs = "files",
 )
+load(
+    ":clang_amd64.bzl",
+    clang_amd64_debs = "files",
+)
 load(":packages.bzl", "download_packages", "generate_deb_tarball")
 
 package(default_visibility = ["//visibility:public"])
@@ -154,23 +158,19 @@
 
 download_packages(
     name = "download_clang_deps",
-    excludes = [
-        "lib32stdc++6",
-        "libstdc++6",
-    ],
-    force_includes = [
-        "libc6",
-        "libc6-dev",
-    ],
     packages = [
-        "clang-6.0",
-        "clang-format-6.0",
-        "gcc",
-        "gfortran",
+        "libtinfo5",
+        "libxml2",
     ],
     target_compatible_with = ["@platforms//os:linux"],
 )
 
+generate_deb_tarball(
+    name = "clang_amd64",
+    files = clang_amd64_debs,
+    target_compatible_with = ["@platforms//os:linux"],
+)
+
 download_packages(
     name = "download_postgresql_deps",
     excludes = [
diff --git a/debian/clang_amd64.bzl b/debian/clang_amd64.bzl
new file mode 100644
index 0000000..dfa7612
--- /dev/null
+++ b/debian/clang_amd64.bzl
@@ -0,0 +1,6 @@
+files = {
+    "libicu72_72.1-3_amd64.deb": "e239c1c9f52bee0ff627f291552d63691b765ec7c5cdf6de7c7ae4dec0275857",
+    "libstdc++6_12.2.0-14_amd64.deb": "9b1b269020cec6aced3b39f096f7b67edd1f0d4ab24f412cb6506d0800e19cbf",
+    "libtinfo5_6.4-4_amd64.deb": "dd347f794e651039e7b4c391f86c674fed7f415b3dca6b0937beb0d470f09c1a",
+    "libxml2_2.9.14+dfsg-1.3~deb12u1_amd64.deb": "35b76cb7038fc1c940204a4f05f33ffb79d027353ce469397d9adcf8f9b3e1a7",
+}
diff --git a/debian/packages.bzl b/debian/packages.bzl
index d39f613..97d6355 100644
--- a/debian/packages.bzl
+++ b/debian/packages.bzl
@@ -32,7 +32,7 @@
 #    output from the previous step.
 # 4. Follow steps 2., 5., and 6. from "adding new packages".
 
-def download_packages(name, packages, excludes = [], force_includes = [], force_excludes = [], target_compatible_with = None, release = "bullseye"):
+def download_packages(name, packages, excludes = [], force_includes = [], force_excludes = [], target_compatible_with = None, release = "bookworm"):
     """Downloads a set of packages as well as their dependencies.
 
     You can also specify excludes in case some of the dependencies are meta
diff --git a/frc971/BUILD b/frc971/BUILD
index 5a357ab..8f749f3 100644
--- a/frc971/BUILD
+++ b/frc971/BUILD
@@ -46,6 +46,7 @@
     ],
     data = [
         "@amd64_debian_sysroot//:sysroot_files",
+        "@clang_amd64_deps//:all",
         "@deb_zlib1g_dev_1_2_11_dfsg_2_amd64_deb_repo//file",
         "@halide_k8//:build_files",
         "@llvm_toolchain//:all-components-x86_64-linux",
diff --git a/frc971/halide_generator.sh b/frc971/halide_generator.sh
index d6e5d41..b0d2698 100755
--- a/frc971/halide_generator.sh
+++ b/frc971/halide_generator.sh
@@ -25,10 +25,11 @@
 ZLIB1G_DEV_AMD64_DEB="$(rlocation deb_zlib1g_dev_1_2_11_dfsg_2_amd64_deb_repo/file/zlib1g-dev_1.2.11.dfsg-2_amd64.deb)"
 ZLIB1G_DEV="$(mktemp -d)"
 LLVM_TOOLCHAIN="$(dirname "$(dirname "$(rlocation llvm_k8/bin/clang)")")"
+LLVM_LIBS="$(rlocation clang_amd64_deps)/usr/lib/x86_64-linux-gnu/"
 dpkg-deb -x "${ZLIB1G_DEV_AMD64_DEB}" "${ZLIB1G_DEV}"
 TARGET=x86_64-unknown-linux-gnu
 MULTIARCH=x86_64-linux-gnu
-export LD_LIBRARY_PATH="${SYSROOT}/usr/lib:${SYSROOT}/lib:${ZLIB1G_DEV}/usr/lib/${MULTIARCH}"
+export LD_LIBRARY_PATH="${SYSROOT}/usr/lib:${SYSROOT}/lib:${ZLIB1G_DEV}/usr/lib/${MULTIARCH}:${LLVM_LIBS}"
 "${LLVM_TOOLCHAIN}/bin/clang++" \
   -fcolor-diagnostics \
   -I"${HALIDE}/include" \
diff --git a/third_party/bazel-toolchain/toolchain/cc_wrapper.sh.tpl b/third_party/bazel-toolchain/toolchain/cc_wrapper.sh.tpl
index 6be2fbb..e575191 100644
--- a/third_party/bazel-toolchain/toolchain/cc_wrapper.sh.tpl
+++ b/third_party/bazel-toolchain/toolchain/cc_wrapper.sh.tpl
@@ -32,6 +32,7 @@
 
 # Call the C++ compiler.
 if [[ -f %{toolchain_path_prefix}bin/clang ]]; then
+  export LD_LIBRARY_PATH=external/llvm_toolchain/llvm/lib/
   exec %{toolchain_path_prefix}bin/clang "$@"
 elif [[ "${BASH_SOURCE[0]}" == "/"* ]]; then
   # Some consumers of `CcToolchainConfigInfo` (e.g. `cmake` from rules_foreign_cc)
@@ -41,6 +42,7 @@
   # This script is at _execroot_/external/_repo_name_/bin/clang_wrapper.sh
   execroot_path="${BASH_SOURCE[0]%/*/*/*/*}"
   clang="${execroot_path}/%{toolchain_path_prefix}bin/clang"
+  export LD_LIBRARY_PATH="${execroot_path}/external/llvm_toolchain/llvm/lib/"
   exec "${clang}" "${@}"
 else
   >&2 echo "ERROR: could not find clang; PWD=\"$(pwd)\"; PATH=\"${PATH}\"."
diff --git a/third_party/bazel-toolchain/toolchain/internal/llvm_distributions.bzl b/third_party/bazel-toolchain/toolchain/internal/llvm_distributions.bzl
index a6b6f5c..57f8bf6 100644
--- a/third_party/bazel-toolchain/toolchain/internal/llvm_distributions.bzl
+++ b/third_party/bazel-toolchain/toolchain/internal/llvm_distributions.bzl
@@ -309,6 +309,7 @@
     "clang+llvm-17.0.2-sparc64-unknown-linux-gnu.tar.xz": "950d1ef440f17e29c4201450ad619d3b4a37a0bbf15f19ce03195e0b4da7d73f",
     "clang+llvm-17.0.2-sparcv9-sun-solaris2.11.tar.xz": "3702914668b5758817374271fa8a41fe67c77b2e86f17706c9d6906f250de6ae",
     "clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz": "df297df804766f8fb18f10a188af78e55d82bb8881751408c2fa694ca19163a8",
+    "clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.zst": "f2eec88faa0036ae2af03af26bd7cb5c6cb0223659b1d7d33739aa9008af5964",
 }
 
 # Note: Unlike the user-specified llvm_mirror attribute, the URL prefixes in
@@ -396,9 +397,14 @@
             urls.append(pattern.format(llvm_version = llvm_version, basename = basename))
     urls.append("{0}{1}".format(_llvm_distributions_base_url[llvm_version], url_suffix))
 
+    if basename.endswith(".tar.zst"):
+        stripPrefix = basename[:(len(basename) - len(".tar.zst"))]
+    else:
+        stripPrefix = basename[:(len(basename) - len(".tar.xz"))]
+
     rctx.download_and_extract(
         urls,
         sha256 = _llvm_distributions[basename],
-        stripPrefix = basename[:(len(basename) - len(".tar.xz"))],
+        stripPrefix = stripPrefix,
         auth = _get_auth(rctx, urls),
     )
diff --git a/third_party/bazel-toolchain/toolchain/internal/repo.bzl b/third_party/bazel-toolchain/toolchain/internal/repo.bzl
index bf23558..9a84922 100644
--- a/third_party/bazel-toolchain/toolchain/internal/repo.bzl
+++ b/third_party/bazel-toolchain/toolchain/internal/repo.bzl
@@ -41,9 +41,21 @@
     if os == "linux":
         if arch == "x86_64":
             rctx.symlink(
-                Label("@libtinfo5_amd64//lib/x86_64-linux-gnu:libtinfo.so.5.9"),
+              Label("@clang_amd64_deps//:lib/x86_64-linux-gnu/libtinfo.so.5.9"),
                 "lib/libtinfo.so.5.9",
             )
+            rctx.symlink(
+              Label("@clang_amd64_deps//:usr/lib/x86_64-linux-gnu/libxml2.so.2.9.14"),
+                "lib/libxml2.so.2.9.14",
+            )
+            rctx.symlink("lib/libxml2.so.2.9.14", "lib/libxml2.so.2")
+
+            for lib in ["libicudata", "libicuuc"]:
+                rctx.symlink(
+                  Label("@clang_amd64_deps//:usr/lib/x86_64-linux-gnu/" + lib + ".so.72.1"),
+                    "lib/" + lib + ".so.72.1",
+                )
+                rctx.symlink("lib/" + lib + ".so.72.1", "lib/" + lib + ".so.72")
         elif arch == "aarch64":
             rctx.symlink(
                 Label("@libtinfo5_arm64//lib/aarch64-linux-gnu:libtinfo.so.5.9"),
diff --git a/y2020/vision/sift/BUILD b/y2020/vision/sift/BUILD
index 1d4083e..8b420ea 100644
--- a/y2020/vision/sift/BUILD
+++ b/y2020/vision/sift/BUILD
@@ -34,6 +34,7 @@
         "fast_gaussian_generator.cc",
         "get_gaussian_kernel.h",
         "@amd64_debian_sysroot//:sysroot_files",
+        "@clang_amd64_deps//:all",
         "@deb_zlib1g_dev_1_2_11_dfsg_2_amd64_deb_repo//file",
         "@halide_k8//:build_files",
         "@llvm_toolchain//:all-components-x86_64-linux",
diff --git a/y2020/vision/sift/fast_gaussian_halide_generator.sh b/y2020/vision/sift/fast_gaussian_halide_generator.sh
index 87fb20d..b33e919 100755
--- a/y2020/vision/sift/fast_gaussian_halide_generator.sh
+++ b/y2020/vision/sift/fast_gaussian_halide_generator.sh
@@ -30,11 +30,12 @@
 ZLIB1G_DEV="$(mktemp -d)"
 
 LLVM_TOOLCHAIN="$(dirname "$(dirname "$(rlocation llvm_k8/bin/clang)")")"
+LLVM_LIBS="$(rlocation clang_amd64_deps)/usr/lib/x86_64-linux-gnu/"
 dpkg-deb -x "${ZLIB1G_DEV_AMD64_DEB}" "${ZLIB1G_DEV}"
 TARGET=x86_64-unknown-linux-gnu
 MULTIARCH=x86_64-linux-gnu
 
-export LD_LIBRARY_PATH="${SYSROOT}/usr/lib:${SYSROOT}/lib:${ZLIB1G_DEV}/usr/lib/${MULTIARCH}"
+export LD_LIBRARY_PATH="${SYSROOT}/usr/lib:${SYSROOT}/lib:${ZLIB1G_DEV}/usr/lib/${MULTIARCH}:${LLVM_LIBS}"
 
 "${LLVM_TOOLCHAIN}/bin/clang++" \
   -fcolor-diagnostics \