Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame^] | 1 | """This script mirrors our pip package dependencies. |
| 2 | |
| 3 | This script looks at the requirements.lock.txt file and generate a wheel for |
| 4 | each entry. Those wheels are then mirrored. |
| 5 | |
| 6 | See tools/python/README.md for some more information. |
| 7 | """ |
| 8 | |
| 9 | import argparse |
| 10 | import hashlib |
| 11 | import json |
| 12 | import os |
| 13 | import pwd |
| 14 | import subprocess |
| 15 | import sys |
| 16 | import tarfile |
| 17 | from pathlib import Path |
| 18 | from typing import List, Optional, Tuple |
| 19 | |
| 20 | import requests |
| 21 | from pkginfo import Wheel |
| 22 | |
| 23 | PYTHON_VERSION = 39 |
| 24 | PLAT = "manylinux_2_28" |
| 25 | ARCH = "x86_64" |
| 26 | WHEELHOUSE_MIRROR_URL = "https://software.frc971.org/Build-Dependencies/wheelhouse" |
| 27 | PY_DEPS_WWWW_DIR = "/var/www/html/files/frc971/Build-Dependencies/wheelhouse" |
| 28 | |
| 29 | |
| 30 | def compute_sha256(data: bytes) -> str: |
| 31 | """Computes the sha256 checksum of a bytes sequence. |
| 32 | |
| 33 | Args: |
| 34 | data: The bytes to checksum. |
| 35 | |
| 36 | Returns: |
| 37 | The hex representation of the checksum. |
| 38 | """ |
| 39 | hasher = hashlib.sha256() |
| 40 | hasher.update(data) |
| 41 | return hasher.hexdigest() |
| 42 | |
| 43 | |
| 44 | def compute_file_sha256(filename: Path) -> str: |
| 45 | """Computes the sha256 checksum of the content of a file. |
| 46 | |
| 47 | Args: |
| 48 | filename: The file to checksum. |
| 49 | |
| 50 | Returns: |
| 51 | The hex representation of the checksum. |
| 52 | """ |
| 53 | return compute_sha256(filename.read_bytes()) |
| 54 | |
| 55 | |
| 56 | def search_for_uploaded_wheel(wheel: Path, wheel_url: str) -> Tuple[bool, str]: |
| 57 | """Searches for this wheel on our internal mirror. |
| 58 | |
| 59 | Since we can't build wheels reproducibly, our best option is to check |
| 60 | whether this wheel already exists on the mirror. If it does, we can skip |
| 61 | uploading it. |
| 62 | |
| 63 | Args: |
| 64 | wheel: The wheel to search for on the mirror. |
| 65 | wheel_url: The URL where the wheel is expected if it exists on the mirror. |
| 66 | |
| 67 | Returns: |
| 68 | A two-tuple. The first value is a boolean that signifies whether the |
| 69 | wheel was found on the mirror. The second value is a string. If the |
| 70 | wheel was not found on the mirror, this is an empty string. Otherwise, |
| 71 | this string contains the sha256 checksum of the wheel found on the |
| 72 | mirror. |
| 73 | """ |
| 74 | # TODO(phil): A better way to do this would be to SSH into the host and |
| 75 | # look for files on the filesystem. |
| 76 | request = requests.get(wheel_url) |
| 77 | |
| 78 | if request.status_code == 200: |
| 79 | return True, compute_sha256(request.content) |
| 80 | if request.status_code == 404: |
| 81 | return False, "" |
| 82 | |
| 83 | raise RuntimeError( |
| 84 | f"Don't know what to do with status code {request.status_cdoe} when trying to get {wheel_url}" |
| 85 | ) |
| 86 | |
| 87 | |
| 88 | def copy_to_host_and_unpack(filename: str, ssh_host: str) -> None: |
| 89 | """Copies the tarball of wheels to the server and unpacks the tarball. |
| 90 | |
| 91 | Args: |
| 92 | filename: The path to the tarball to be uploaded. |
| 93 | ssh_host: The server that will be passed to ssh(1) for uploading and |
| 94 | unpacking the tarball. |
| 95 | """ |
| 96 | # TODO(phil): De-duplicate with tools/go/mirror_go_repos.py |
| 97 | |
| 98 | subprocess.run(["scp", filename, f"{ssh_host}:"], check=True) |
| 99 | |
| 100 | # Be careful not to use single quotes in these commands to avoid breaking |
| 101 | # the subprocess.run() invocation below. |
| 102 | command = " && ".join([ |
| 103 | f"mkdir -p {PY_DEPS_WWWW_DIR}", |
| 104 | f"tar -C {PY_DEPS_WWWW_DIR} --no-same-owner -xvaf {filename.name}", |
| 105 | # Change the permissions so other users can read them (and checksum |
| 106 | # them). |
| 107 | f"find {PY_DEPS_WWWW_DIR}/ -type f -exec chmod 644 {{}} +", |
| 108 | ]) |
| 109 | |
| 110 | print("You might be asked for your sudo password shortly.") |
| 111 | subprocess.run( |
| 112 | ["ssh", "-t", ssh_host, f"sudo -u www-data bash -c '{command}'"], |
| 113 | check=True) |
| 114 | |
| 115 | |
| 116 | def main(argv: List[str]) -> Optional[int]: |
| 117 | parser = argparse.ArgumentParser() |
| 118 | parser.add_argument( |
| 119 | "-f", |
| 120 | "--force", |
| 121 | action="store_true", |
| 122 | help=("If set, ignores packages we have already uploaded and " |
| 123 | "possibly overwrite them with the just-built ones. Use with " |
| 124 | "extreme caution! This may easily cause issues with building " |
| 125 | "older commits. Use this only if you know what you're doing.")) |
| 126 | parser.add_argument( |
| 127 | "--ssh_host", |
| 128 | type=str, |
| 129 | help=("The SSH host to copy the downloaded Go repositories to. This " |
| 130 | "should be software.971spartans.net where all the " |
| 131 | "Build-Dependencies files live. Only specify this if you have " |
| 132 | "access to the server.")) |
| 133 | args = parser.parse_args(argv[1:]) |
| 134 | |
| 135 | root_dir = Path(os.environ["BUILD_WORKSPACE_DIRECTORY"]) |
| 136 | caller = os.getenv("SUDO_USER") or os.environ["USER"] |
| 137 | caller_id = pwd.getpwnam(caller).pw_uid |
| 138 | |
| 139 | python_dir = root_dir / "tools" / "python" |
| 140 | |
| 141 | # Run the wheel generation script inside the docker container provided by |
| 142 | # the pypa/manylinux project. |
| 143 | # https://github.com/pypa/manylinux/ |
| 144 | subprocess.run([ |
| 145 | "docker", |
| 146 | "run", |
| 147 | "-it", |
| 148 | "-v", |
| 149 | f"{python_dir}:/opt/971_build/", |
| 150 | f"quay.io/pypa/{PLAT}_{ARCH}", |
| 151 | "/opt/971_build/generate_pip_packages_in_docker.sh", |
| 152 | PLAT, |
| 153 | ARCH, |
| 154 | str(PYTHON_VERSION), |
| 155 | str(caller_id), |
| 156 | ], |
| 157 | check=True) |
| 158 | |
| 159 | # Get the list of wheels we downloaded form pypi.org or built ourselves. |
| 160 | wheelhouse = python_dir / "wheelhouse" |
| 161 | wheels = wheelhouse.glob("*.whl") |
| 162 | |
| 163 | # Assemble the override list. This list will tell rules_python to download |
| 164 | # from our mirror instead of pypi.org. |
| 165 | wheels_to_be_uploaded = [] |
| 166 | override_information = {} |
| 167 | for wheel in sorted(wheels): |
| 168 | wheel_url = f"{WHEELHOUSE_MIRROR_URL}/{wheel.name}" |
| 169 | sha256 = compute_file_sha256(wheel) |
| 170 | |
| 171 | # Check if we already have the wheel uploaded. If so, download that one |
| 172 | # into the wheelhouse. This lets us avoid non-reproducibility with pip |
| 173 | # and native extensions. |
| 174 | # https://github.com/pypa/pip/issues/9604 |
| 175 | wheel_found, sha256_on_mirror = search_for_uploaded_wheel( |
| 176 | wheel, wheel_url) |
| 177 | |
| 178 | if args.force: |
| 179 | if wheel_found and sha256 != sha256_on_mirror: |
| 180 | print( |
| 181 | f"WARNING: The next upload wheel change sha256 for {wheel}!" |
| 182 | ) |
| 183 | wheels_to_be_uploaded.append(wheel) |
| 184 | else: |
| 185 | if wheel_found: |
| 186 | sha256 = sha256_on_mirror |
| 187 | else: |
| 188 | wheels_to_be_uploaded.append(wheel) |
| 189 | |
| 190 | # Update the override information for this wheel. |
| 191 | # We use lower-case for the package names here because that's what the |
| 192 | # requirements.lock.txt file uses. |
| 193 | info = Wheel(wheel) |
| 194 | override_information[f"{info.name.lower()}=={info.version}"] = { |
| 195 | "url": wheel_url, |
| 196 | "sha256": sha256, |
| 197 | } |
| 198 | |
| 199 | print(f"We need to upload {len(wheels_to_be_uploaded)} wheels:") |
| 200 | for wheel in wheels_to_be_uploaded: |
| 201 | print(wheel) |
| 202 | |
| 203 | # Create a tarball of all the wheels that need to be mirrored. |
| 204 | py_deps_tar = root_dir / "py_deps.tar" |
| 205 | with tarfile.open(py_deps_tar, "w") as tar: |
| 206 | for wheel in wheels_to_be_uploaded: |
| 207 | tar.add(wheel, arcname=wheel.name) |
| 208 | |
| 209 | # Upload the wheels if requested. |
| 210 | if wheels_to_be_uploaded and args.ssh_host: |
| 211 | copy_to_host_and_unpack(py_deps_tar, args.ssh_host) |
| 212 | else: |
| 213 | print("Skipping mirroring because of lack of --ssh_host or there's " |
| 214 | "nothing to actually mirror.") |
| 215 | |
| 216 | # Write out the overrides file. |
| 217 | override_file = python_dir / "whl_overrides.json" |
| 218 | override_file.write_text( |
| 219 | json.dumps(override_information, indent=4, sort_keys=True) + "\n") |
| 220 | |
| 221 | |
| 222 | if __name__ == "__main__": |
| 223 | sys.exit(main(sys.argv)) |