Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 1 | """This script mirrors our pip package dependencies. |
| 2 | |
| 3 | This script looks at the requirements.lock.txt file and generate a wheel for |
| 4 | each entry. Those wheels are then mirrored. |
| 5 | |
| 6 | See tools/python/README.md for some more information. |
| 7 | """ |
| 8 | |
| 9 | import argparse |
| 10 | import hashlib |
| 11 | import json |
| 12 | import os |
| 13 | import pwd |
| 14 | import subprocess |
| 15 | import sys |
| 16 | import tarfile |
| 17 | from pathlib import Path |
| 18 | from typing import List, Optional, Tuple |
| 19 | |
| 20 | import requests |
| 21 | from pkginfo import Wheel |
| 22 | |
Philipp Schrader | 49902d4 | 2022-10-29 14:09:18 -0700 | [diff] [blame] | 23 | PLAT = "manylinux_2_31" |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 24 | ARCH = "x86_64" |
| 25 | WHEELHOUSE_MIRROR_URL = "https://software.frc971.org/Build-Dependencies/wheelhouse" |
| 26 | PY_DEPS_WWWW_DIR = "/var/www/html/files/frc971/Build-Dependencies/wheelhouse" |
| 27 | |
| 28 | |
Philipp Schrader | 469a2f2 | 2022-10-29 14:25:58 -0700 | [diff] [blame] | 29 | def sanitize_name(name: str) -> str: |
| 30 | """Sanitizes a package name so it's consistent across all use cases. |
| 31 | |
| 32 | pip is really inconsistent about using real package names vs. whatever |
| 33 | users typed into the requirements file. It feels random. |
| 34 | Everything is lower-cased and dashes are replaced by underscores. |
| 35 | |
| 36 | Args: |
| 37 | name: The name to sanitize. |
| 38 | |
| 39 | Returns: |
| 40 | The sanitized name. |
| 41 | """ |
| 42 | return name.lower().replace("-", "_").replace(".", "_") |
| 43 | |
| 44 | |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 45 | def compute_sha256(data: bytes) -> str: |
| 46 | """Computes the sha256 checksum of a bytes sequence. |
| 47 | |
| 48 | Args: |
| 49 | data: The bytes to checksum. |
| 50 | |
| 51 | Returns: |
| 52 | The hex representation of the checksum. |
| 53 | """ |
| 54 | hasher = hashlib.sha256() |
| 55 | hasher.update(data) |
| 56 | return hasher.hexdigest() |
| 57 | |
| 58 | |
| 59 | def compute_file_sha256(filename: Path) -> str: |
| 60 | """Computes the sha256 checksum of the content of a file. |
| 61 | |
| 62 | Args: |
| 63 | filename: The file to checksum. |
| 64 | |
| 65 | Returns: |
| 66 | The hex representation of the checksum. |
| 67 | """ |
| 68 | return compute_sha256(filename.read_bytes()) |
| 69 | |
| 70 | |
| 71 | def search_for_uploaded_wheel(wheel: Path, wheel_url: str) -> Tuple[bool, str]: |
| 72 | """Searches for this wheel on our internal mirror. |
| 73 | |
| 74 | Since we can't build wheels reproducibly, our best option is to check |
| 75 | whether this wheel already exists on the mirror. If it does, we can skip |
| 76 | uploading it. |
| 77 | |
| 78 | Args: |
| 79 | wheel: The wheel to search for on the mirror. |
| 80 | wheel_url: The URL where the wheel is expected if it exists on the mirror. |
| 81 | |
| 82 | Returns: |
| 83 | A two-tuple. The first value is a boolean that signifies whether the |
| 84 | wheel was found on the mirror. The second value is a string. If the |
| 85 | wheel was not found on the mirror, this is an empty string. Otherwise, |
| 86 | this string contains the sha256 checksum of the wheel found on the |
| 87 | mirror. |
| 88 | """ |
| 89 | # TODO(phil): A better way to do this would be to SSH into the host and |
| 90 | # look for files on the filesystem. |
| 91 | request = requests.get(wheel_url) |
| 92 | |
| 93 | if request.status_code == 200: |
| 94 | return True, compute_sha256(request.content) |
| 95 | if request.status_code == 404: |
| 96 | return False, "" |
| 97 | |
| 98 | raise RuntimeError( |
| 99 | f"Don't know what to do with status code {request.status_cdoe} when trying to get {wheel_url}" |
| 100 | ) |
| 101 | |
| 102 | |
| 103 | def copy_to_host_and_unpack(filename: str, ssh_host: str) -> None: |
| 104 | """Copies the tarball of wheels to the server and unpacks the tarball. |
| 105 | |
| 106 | Args: |
| 107 | filename: The path to the tarball to be uploaded. |
| 108 | ssh_host: The server that will be passed to ssh(1) for uploading and |
| 109 | unpacking the tarball. |
| 110 | """ |
| 111 | # TODO(phil): De-duplicate with tools/go/mirror_go_repos.py |
| 112 | |
| 113 | subprocess.run(["scp", filename, f"{ssh_host}:"], check=True) |
| 114 | |
| 115 | # Be careful not to use single quotes in these commands to avoid breaking |
| 116 | # the subprocess.run() invocation below. |
| 117 | command = " && ".join([ |
| 118 | f"mkdir -p {PY_DEPS_WWWW_DIR}", |
| 119 | f"tar -C {PY_DEPS_WWWW_DIR} --no-same-owner -xvaf {filename.name}", |
| 120 | # Change the permissions so other users can read them (and checksum |
| 121 | # them). |
| 122 | f"find {PY_DEPS_WWWW_DIR}/ -type f -exec chmod 644 {{}} +", |
| 123 | ]) |
| 124 | |
| 125 | print("You might be asked for your sudo password shortly.") |
| 126 | subprocess.run( |
| 127 | ["ssh", "-t", ssh_host, f"sudo -u www-data bash -c '{command}'"], |
| 128 | check=True) |
| 129 | |
| 130 | |
| 131 | def main(argv: List[str]) -> Optional[int]: |
| 132 | parser = argparse.ArgumentParser() |
| 133 | parser.add_argument( |
| 134 | "-f", |
| 135 | "--force", |
| 136 | action="store_true", |
| 137 | help=("If set, ignores packages we have already uploaded and " |
| 138 | "possibly overwrite them with the just-built ones. Use with " |
| 139 | "extreme caution! This may easily cause issues with building " |
| 140 | "older commits. Use this only if you know what you're doing.")) |
| 141 | parser.add_argument( |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 142 | "-l", |
| 143 | "--local_test", |
| 144 | action="store_true", |
| 145 | help=("If set, generate the URL overrides pointing at the generated " |
| 146 | "local files. Incompatible with --ssh_host. This is useful for " |
| 147 | "iterating on generated wheel files.")) |
| 148 | parser.add_argument( |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 149 | "--ssh_host", |
| 150 | type=str, |
| 151 | help=("The SSH host to copy the downloaded Go repositories to. This " |
| 152 | "should be software.971spartans.net where all the " |
| 153 | "Build-Dependencies files live. Only specify this if you have " |
| 154 | "access to the server.")) |
| 155 | args = parser.parse_args(argv[1:]) |
| 156 | |
| 157 | root_dir = Path(os.environ["BUILD_WORKSPACE_DIRECTORY"]) |
| 158 | caller = os.getenv("SUDO_USER") or os.environ["USER"] |
| 159 | caller_id = pwd.getpwnam(caller).pw_uid |
| 160 | |
| 161 | python_dir = root_dir / "tools" / "python" |
| 162 | |
Philipp Schrader | 49902d4 | 2022-10-29 14:09:18 -0700 | [diff] [blame] | 163 | container_tag = f"pip-compile:{caller}" |
| 164 | |
| 165 | subprocess.run([ |
| 166 | "docker", |
| 167 | "build", |
| 168 | "--file=generate_pip_packages.Dockerfile", |
| 169 | f"--tag={container_tag}", |
| 170 | ".", |
| 171 | ], |
| 172 | cwd=python_dir, |
| 173 | check=True) |
| 174 | |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 175 | # Run the wheel generation script inside the docker container provided by |
| 176 | # the pypa/manylinux project. |
| 177 | # https://github.com/pypa/manylinux/ |
| 178 | subprocess.run([ |
| 179 | "docker", |
| 180 | "run", |
| 181 | "-it", |
| 182 | "-v", |
| 183 | f"{python_dir}:/opt/971_build/", |
Philipp Schrader | 49902d4 | 2022-10-29 14:09:18 -0700 | [diff] [blame] | 184 | container_tag, |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 185 | "/opt/971_build/generate_pip_packages_in_docker.sh", |
| 186 | PLAT, |
| 187 | ARCH, |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 188 | str(caller_id), |
| 189 | ], |
| 190 | check=True) |
| 191 | |
| 192 | # Get the list of wheels we downloaded form pypi.org or built ourselves. |
| 193 | wheelhouse = python_dir / "wheelhouse" |
| 194 | wheels = wheelhouse.glob("*.whl") |
| 195 | |
| 196 | # Assemble the override list. This list will tell rules_python to download |
| 197 | # from our mirror instead of pypi.org. |
| 198 | wheels_to_be_uploaded = [] |
| 199 | override_information = {} |
| 200 | for wheel in sorted(wheels): |
| 201 | wheel_url = f"{WHEELHOUSE_MIRROR_URL}/{wheel.name}" |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 202 | if args.local_test: |
| 203 | override_url = f"file://{wheel.resolve()}" |
| 204 | else: |
| 205 | override_url = wheel_url |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 206 | sha256 = compute_file_sha256(wheel) |
| 207 | |
| 208 | # Check if we already have the wheel uploaded. If so, download that one |
| 209 | # into the wheelhouse. This lets us avoid non-reproducibility with pip |
| 210 | # and native extensions. |
| 211 | # https://github.com/pypa/pip/issues/9604 |
| 212 | wheel_found, sha256_on_mirror = search_for_uploaded_wheel( |
| 213 | wheel, wheel_url) |
| 214 | |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 215 | if args.local_test: |
| 216 | wheel_found = False |
| 217 | |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 218 | if args.force: |
| 219 | if wheel_found and sha256 != sha256_on_mirror: |
| 220 | print( |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 221 | f"WARNING: The next upload will change sha256 for {wheel}!" |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 222 | ) |
| 223 | wheels_to_be_uploaded.append(wheel) |
| 224 | else: |
| 225 | if wheel_found: |
| 226 | sha256 = sha256_on_mirror |
| 227 | else: |
| 228 | wheels_to_be_uploaded.append(wheel) |
| 229 | |
| 230 | # Update the override information for this wheel. |
| 231 | # We use lower-case for the package names here because that's what the |
| 232 | # requirements.lock.txt file uses. |
| 233 | info = Wheel(wheel) |
Philipp Schrader | 469a2f2 | 2022-10-29 14:25:58 -0700 | [diff] [blame] | 234 | override_information[f"{sanitize_name(info.name)}=={info.version}"] = { |
Philipp Schrader | 8ee2c2c | 2022-10-29 13:32:27 -0700 | [diff] [blame] | 235 | "url": override_url, |
Philipp Schrader | 868070a | 2022-09-06 22:51:13 -0700 | [diff] [blame] | 236 | "sha256": sha256, |
| 237 | } |
| 238 | |
| 239 | print(f"We need to upload {len(wheels_to_be_uploaded)} wheels:") |
| 240 | for wheel in wheels_to_be_uploaded: |
| 241 | print(wheel) |
| 242 | |
| 243 | # Create a tarball of all the wheels that need to be mirrored. |
| 244 | py_deps_tar = root_dir / "py_deps.tar" |
| 245 | with tarfile.open(py_deps_tar, "w") as tar: |
| 246 | for wheel in wheels_to_be_uploaded: |
| 247 | tar.add(wheel, arcname=wheel.name) |
| 248 | |
| 249 | # Upload the wheels if requested. |
| 250 | if wheels_to_be_uploaded and args.ssh_host: |
| 251 | copy_to_host_and_unpack(py_deps_tar, args.ssh_host) |
| 252 | else: |
| 253 | print("Skipping mirroring because of lack of --ssh_host or there's " |
| 254 | "nothing to actually mirror.") |
| 255 | |
| 256 | # Write out the overrides file. |
| 257 | override_file = python_dir / "whl_overrides.json" |
| 258 | override_file.write_text( |
| 259 | json.dumps(override_information, indent=4, sort_keys=True) + "\n") |
| 260 | |
| 261 | |
| 262 | if __name__ == "__main__": |
| 263 | sys.exit(main(sys.argv)) |