Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 1 | """This script mirrors our pip package dependencies. |
| 2 | |
| 3 | This script looks at the requirements.lock.txt file and generate a wheel for |
| 4 | each entry. Those wheels are then mirrored. |
| 5 | |
| 6 | See tools/python/README.md for some more information. |
| 7 | """ |
| 8 | |
| 9 | import argparse |
| 10 | import hashlib |
| 11 | import json |
| 12 | import os |
| 13 | import pwd |
| 14 | import subprocess |
| 15 | import sys |
| 16 | import tarfile |
| 17 | from pathlib import Path |
| 18 | from typing import List, Optional, Tuple |
| 19 | |
| 20 | import requests |
| 21 | from pkginfo import Wheel |
| 22 | |
Philipp Schrader | 49902d4 | 2022-10-29 14:09:18 -0700 | [diff] [blame] | 23 | PLAT = "manylinux_2_31" |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 24 | ARCH = "x86_64" |
| 25 | WHEELHOUSE_MIRROR_URL = "https://software.frc971.org/Build-Dependencies/wheelhouse" |
| 26 | PY_DEPS_WWWW_DIR = "/var/www/html/files/frc971/Build-Dependencies/wheelhouse" |
| 27 | |
| 28 | |
| 29 | def compute_sha256(data: bytes) -> str: |
| 30 | """Computes the sha256 checksum of a bytes sequence. |
| 31 | |
| 32 | Args: |
| 33 | data: The bytes to checksum. |
| 34 | |
| 35 | Returns: |
| 36 | The hex representation of the checksum. |
| 37 | """ |
| 38 | hasher = hashlib.sha256() |
| 39 | hasher.update(data) |
| 40 | return hasher.hexdigest() |
| 41 | |
| 42 | |
| 43 | def compute_file_sha256(filename: Path) -> str: |
| 44 | """Computes the sha256 checksum of the content of a file. |
| 45 | |
| 46 | Args: |
| 47 | filename: The file to checksum. |
| 48 | |
| 49 | Returns: |
| 50 | The hex representation of the checksum. |
| 51 | """ |
| 52 | return compute_sha256(filename.read_bytes()) |
| 53 | |
| 54 | |
| 55 | def search_for_uploaded_wheel(wheel: Path, wheel_url: str) -> Tuple[bool, str]: |
| 56 | """Searches for this wheel on our internal mirror. |
| 57 | |
| 58 | Since we can't build wheels reproducibly, our best option is to check |
| 59 | whether this wheel already exists on the mirror. If it does, we can skip |
| 60 | uploading it. |
| 61 | |
| 62 | Args: |
| 63 | wheel: The wheel to search for on the mirror. |
| 64 | wheel_url: The URL where the wheel is expected if it exists on the mirror. |
| 65 | |
| 66 | Returns: |
| 67 | A two-tuple. The first value is a boolean that signifies whether the |
| 68 | wheel was found on the mirror. The second value is a string. If the |
| 69 | wheel was not found on the mirror, this is an empty string. Otherwise, |
| 70 | this string contains the sha256 checksum of the wheel found on the |
| 71 | mirror. |
| 72 | """ |
| 73 | # TODO(phil): A better way to do this would be to SSH into the host and |
| 74 | # look for files on the filesystem. |
| 75 | request = requests.get(wheel_url) |
| 76 | |
| 77 | if request.status_code == 200: |
| 78 | return True, compute_sha256(request.content) |
| 79 | if request.status_code == 404: |
| 80 | return False, "" |
| 81 | |
| 82 | raise RuntimeError( |
| 83 | f"Don't know what to do with status code {request.status_cdoe} when trying to get {wheel_url}" |
| 84 | ) |
| 85 | |
| 86 | |
| 87 | def copy_to_host_and_unpack(filename: str, ssh_host: str) -> None: |
| 88 | """Copies the tarball of wheels to the server and unpacks the tarball. |
| 89 | |
| 90 | Args: |
| 91 | filename: The path to the tarball to be uploaded. |
| 92 | ssh_host: The server that will be passed to ssh(1) for uploading and |
| 93 | unpacking the tarball. |
| 94 | """ |
| 95 | # TODO(phil): De-duplicate with tools/go/mirror_go_repos.py |
| 96 | |
| 97 | subprocess.run(["scp", filename, f"{ssh_host}:"], check=True) |
| 98 | |
| 99 | # Be careful not to use single quotes in these commands to avoid breaking |
| 100 | # the subprocess.run() invocation below. |
| 101 | command = " && ".join([ |
| 102 | f"mkdir -p {PY_DEPS_WWWW_DIR}", |
| 103 | f"tar -C {PY_DEPS_WWWW_DIR} --no-same-owner -xvaf {filename.name}", |
| 104 | # Change the permissions so other users can read them (and checksum |
| 105 | # them). |
| 106 | f"find {PY_DEPS_WWWW_DIR}/ -type f -exec chmod 644 {{}} +", |
| 107 | ]) |
| 108 | |
| 109 | print("You might be asked for your sudo password shortly.") |
| 110 | subprocess.run( |
| 111 | ["ssh", "-t", ssh_host, f"sudo -u www-data bash -c '{command}'"], |
| 112 | check=True) |
| 113 | |
| 114 | |
| 115 | def main(argv: List[str]) -> Optional[int]: |
| 116 | parser = argparse.ArgumentParser() |
| 117 | parser.add_argument( |
| 118 | "-f", |
| 119 | "--force", |
| 120 | action="store_true", |
| 121 | help=("If set, ignores packages we have already uploaded and " |
| 122 | "possibly overwrite them with the just-built ones. Use with " |
| 123 | "extreme caution! This may easily cause issues with building " |
| 124 | "older commits. Use this only if you know what you're doing.")) |
| 125 | parser.add_argument( |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 126 | "-l", |
| 127 | "--local_test", |
| 128 | action="store_true", |
| 129 | help=("If set, generate the URL overrides pointing at the generated " |
| 130 | "local files. Incompatible with --ssh_host. This is useful for " |
| 131 | "iterating on generated wheel files.")) |
| 132 | parser.add_argument( |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 133 | "--ssh_host", |
| 134 | type=str, |
| 135 | help=("The SSH host to copy the downloaded Go repositories to. This " |
| 136 | "should be software.971spartans.net where all the " |
| 137 | "Build-Dependencies files live. Only specify this if you have " |
| 138 | "access to the server.")) |
| 139 | args = parser.parse_args(argv[1:]) |
| 140 | |
| 141 | root_dir = Path(os.environ["BUILD_WORKSPACE_DIRECTORY"]) |
| 142 | caller = os.getenv("SUDO_USER") or os.environ["USER"] |
| 143 | caller_id = pwd.getpwnam(caller).pw_uid |
| 144 | |
| 145 | python_dir = root_dir / "tools" / "python" |
| 146 | |
Philipp Schrader | 49902d4 | 2022-10-29 14:09:18 -0700 | [diff] [blame] | 147 | container_tag = f"pip-compile:{caller}" |
| 148 | |
| 149 | subprocess.run([ |
| 150 | "docker", |
| 151 | "build", |
| 152 | "--file=generate_pip_packages.Dockerfile", |
| 153 | f"--tag={container_tag}", |
| 154 | ".", |
| 155 | ], |
| 156 | cwd=python_dir, |
| 157 | check=True) |
| 158 | |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 159 | # Run the wheel generation script inside the docker container provided by |
| 160 | # the pypa/manylinux project. |
| 161 | # https://github.com/pypa/manylinux/ |
| 162 | subprocess.run([ |
| 163 | "docker", |
| 164 | "run", |
| 165 | "-it", |
| 166 | "-v", |
| 167 | f"{python_dir}:/opt/971_build/", |
Philipp Schrader | 49902d4 | 2022-10-29 14:09:18 -0700 | [diff] [blame] | 168 | container_tag, |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 169 | "/opt/971_build/generate_pip_packages_in_docker.sh", |
| 170 | PLAT, |
| 171 | ARCH, |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 172 | str(caller_id), |
| 173 | ], |
| 174 | check=True) |
| 175 | |
| 176 | # Get the list of wheels we downloaded form pypi.org or built ourselves. |
| 177 | wheelhouse = python_dir / "wheelhouse" |
| 178 | wheels = wheelhouse.glob("*.whl") |
| 179 | |
| 180 | # Assemble the override list. This list will tell rules_python to download |
| 181 | # from our mirror instead of pypi.org. |
| 182 | wheels_to_be_uploaded = [] |
| 183 | override_information = {} |
| 184 | for wheel in sorted(wheels): |
| 185 | wheel_url = f"{WHEELHOUSE_MIRROR_URL}/{wheel.name}" |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 186 | if args.local_test: |
| 187 | override_url = f"file://{wheel.resolve()}" |
| 188 | else: |
| 189 | override_url = wheel_url |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 190 | sha256 = compute_file_sha256(wheel) |
| 191 | |
| 192 | # Check if we already have the wheel uploaded. If so, download that one |
| 193 | # into the wheelhouse. This lets us avoid non-reproducibility with pip |
| 194 | # and native extensions. |
| 195 | # https://github.com/pypa/pip/issues/9604 |
| 196 | wheel_found, sha256_on_mirror = search_for_uploaded_wheel( |
| 197 | wheel, wheel_url) |
| 198 | |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 199 | if args.local_test: |
| 200 | wheel_found = False |
| 201 | |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 202 | if args.force: |
| 203 | if wheel_found and sha256 != sha256_on_mirror: |
| 204 | print( |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 205 | f"WARNING: The next upload will change sha256 for {wheel}!" |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 206 | ) |
| 207 | wheels_to_be_uploaded.append(wheel) |
| 208 | else: |
| 209 | if wheel_found: |
| 210 | sha256 = sha256_on_mirror |
| 211 | else: |
| 212 | wheels_to_be_uploaded.append(wheel) |
| 213 | |
| 214 | # Update the override information for this wheel. |
| 215 | # We use lower-case for the package names here because that's what the |
| 216 | # requirements.lock.txt file uses. |
| 217 | info = Wheel(wheel) |
| 218 | override_information[f"{info.name.lower()}=={info.version}"] = { |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 219 | "url": override_url, |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 220 | "sha256": sha256, |
| 221 | } |
| 222 | |
| 223 | print(f"We need to upload {len(wheels_to_be_uploaded)} wheels:") |
| 224 | for wheel in wheels_to_be_uploaded: |
| 225 | print(wheel) |
| 226 | |
| 227 | # Create a tarball of all the wheels that need to be mirrored. |
| 228 | py_deps_tar = root_dir / "py_deps.tar" |
| 229 | with tarfile.open(py_deps_tar, "w") as tar: |
| 230 | for wheel in wheels_to_be_uploaded: |
| 231 | tar.add(wheel, arcname=wheel.name) |
| 232 | |
| 233 | # Upload the wheels if requested. |
| 234 | if wheels_to_be_uploaded and args.ssh_host: |
| 235 | copy_to_host_and_unpack(py_deps_tar, args.ssh_host) |
| 236 | else: |
| 237 | print("Skipping mirroring because of lack of --ssh_host or there's " |
| 238 | "nothing to actually mirror.") |
| 239 | |
| 240 | # Write out the overrides file. |
| 241 | override_file = python_dir / "whl_overrides.json" |
| 242 | override_file.write_text( |
| 243 | json.dumps(override_information, indent=4, sort_keys=True) + "\n") |
| 244 | |
| 245 | |
| 246 | if __name__ == "__main__": |
| 247 | sys.exit(main(sys.argv)) |