blob: aa8ed886ddfb89798b63c2497b1a0a9b4d25e2d8 [file] [log] [blame]
Philipp Schrader868070a2022-09-06 22:51:13 -07001"""This script mirrors our pip package dependencies.
2
3This script looks at the requirements.lock.txt file and generate a wheel for
4each entry. Those wheels are then mirrored.
5
6See tools/python/README.md for some more information.
7"""
8
9import argparse
10import hashlib
11import json
12import os
13import pwd
14import subprocess
15import sys
16import tarfile
17from pathlib import Path
18from typing import List, Optional, Tuple
19
20import requests
21from pkginfo import Wheel
22
23PYTHON_VERSION = 39
24PLAT = "manylinux_2_28"
25ARCH = "x86_64"
26WHEELHOUSE_MIRROR_URL = "https://software.frc971.org/Build-Dependencies/wheelhouse"
27PY_DEPS_WWWW_DIR = "/var/www/html/files/frc971/Build-Dependencies/wheelhouse"
28
29
30def compute_sha256(data: bytes) -> str:
31 """Computes the sha256 checksum of a bytes sequence.
32
33 Args:
34 data: The bytes to checksum.
35
36 Returns:
37 The hex representation of the checksum.
38 """
39 hasher = hashlib.sha256()
40 hasher.update(data)
41 return hasher.hexdigest()
42
43
44def compute_file_sha256(filename: Path) -> str:
45 """Computes the sha256 checksum of the content of a file.
46
47 Args:
48 filename: The file to checksum.
49
50 Returns:
51 The hex representation of the checksum.
52 """
53 return compute_sha256(filename.read_bytes())
54
55
56def search_for_uploaded_wheel(wheel: Path, wheel_url: str) -> Tuple[bool, str]:
57 """Searches for this wheel on our internal mirror.
58
59 Since we can't build wheels reproducibly, our best option is to check
60 whether this wheel already exists on the mirror. If it does, we can skip
61 uploading it.
62
63 Args:
64 wheel: The wheel to search for on the mirror.
65 wheel_url: The URL where the wheel is expected if it exists on the mirror.
66
67 Returns:
68 A two-tuple. The first value is a boolean that signifies whether the
69 wheel was found on the mirror. The second value is a string. If the
70 wheel was not found on the mirror, this is an empty string. Otherwise,
71 this string contains the sha256 checksum of the wheel found on the
72 mirror.
73 """
74 # TODO(phil): A better way to do this would be to SSH into the host and
75 # look for files on the filesystem.
76 request = requests.get(wheel_url)
77
78 if request.status_code == 200:
79 return True, compute_sha256(request.content)
80 if request.status_code == 404:
81 return False, ""
82
83 raise RuntimeError(
84 f"Don't know what to do with status code {request.status_cdoe} when trying to get {wheel_url}"
85 )
86
87
88def copy_to_host_and_unpack(filename: str, ssh_host: str) -> None:
89 """Copies the tarball of wheels to the server and unpacks the tarball.
90
91 Args:
92 filename: The path to the tarball to be uploaded.
93 ssh_host: The server that will be passed to ssh(1) for uploading and
94 unpacking the tarball.
95 """
96 # TODO(phil): De-duplicate with tools/go/mirror_go_repos.py
97
98 subprocess.run(["scp", filename, f"{ssh_host}:"], check=True)
99
100 # Be careful not to use single quotes in these commands to avoid breaking
101 # the subprocess.run() invocation below.
102 command = " && ".join([
103 f"mkdir -p {PY_DEPS_WWWW_DIR}",
104 f"tar -C {PY_DEPS_WWWW_DIR} --no-same-owner -xvaf {filename.name}",
105 # Change the permissions so other users can read them (and checksum
106 # them).
107 f"find {PY_DEPS_WWWW_DIR}/ -type f -exec chmod 644 {{}} +",
108 ])
109
110 print("You might be asked for your sudo password shortly.")
111 subprocess.run(
112 ["ssh", "-t", ssh_host, f"sudo -u www-data bash -c '{command}'"],
113 check=True)
114
115
116def main(argv: List[str]) -> Optional[int]:
117 parser = argparse.ArgumentParser()
118 parser.add_argument(
119 "-f",
120 "--force",
121 action="store_true",
122 help=("If set, ignores packages we have already uploaded and "
123 "possibly overwrite them with the just-built ones. Use with "
124 "extreme caution! This may easily cause issues with building "
125 "older commits. Use this only if you know what you're doing."))
126 parser.add_argument(
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700127 "-l",
128 "--local_test",
129 action="store_true",
130 help=("If set, generate the URL overrides pointing at the generated "
131 "local files. Incompatible with --ssh_host. This is useful for "
132 "iterating on generated wheel files."))
133 parser.add_argument(
Philipp Schrader868070a2022-09-06 22:51:13 -0700134 "--ssh_host",
135 type=str,
136 help=("The SSH host to copy the downloaded Go repositories to. This "
137 "should be software.971spartans.net where all the "
138 "Build-Dependencies files live. Only specify this if you have "
139 "access to the server."))
140 args = parser.parse_args(argv[1:])
141
142 root_dir = Path(os.environ["BUILD_WORKSPACE_DIRECTORY"])
143 caller = os.getenv("SUDO_USER") or os.environ["USER"]
144 caller_id = pwd.getpwnam(caller).pw_uid
145
146 python_dir = root_dir / "tools" / "python"
147
148 # Run the wheel generation script inside the docker container provided by
149 # the pypa/manylinux project.
150 # https://github.com/pypa/manylinux/
151 subprocess.run([
152 "docker",
153 "run",
154 "-it",
155 "-v",
156 f"{python_dir}:/opt/971_build/",
157 f"quay.io/pypa/{PLAT}_{ARCH}",
158 "/opt/971_build/generate_pip_packages_in_docker.sh",
159 PLAT,
160 ARCH,
161 str(PYTHON_VERSION),
162 str(caller_id),
163 ],
164 check=True)
165
166 # Get the list of wheels we downloaded form pypi.org or built ourselves.
167 wheelhouse = python_dir / "wheelhouse"
168 wheels = wheelhouse.glob("*.whl")
169
170 # Assemble the override list. This list will tell rules_python to download
171 # from our mirror instead of pypi.org.
172 wheels_to_be_uploaded = []
173 override_information = {}
174 for wheel in sorted(wheels):
175 wheel_url = f"{WHEELHOUSE_MIRROR_URL}/{wheel.name}"
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700176 if args.local_test:
177 override_url = f"file://{wheel.resolve()}"
178 else:
179 override_url = wheel_url
Philipp Schrader868070a2022-09-06 22:51:13 -0700180 sha256 = compute_file_sha256(wheel)
181
182 # Check if we already have the wheel uploaded. If so, download that one
183 # into the wheelhouse. This lets us avoid non-reproducibility with pip
184 # and native extensions.
185 # https://github.com/pypa/pip/issues/9604
186 wheel_found, sha256_on_mirror = search_for_uploaded_wheel(
187 wheel, wheel_url)
188
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700189 if args.local_test:
190 wheel_found = False
191
Philipp Schrader868070a2022-09-06 22:51:13 -0700192 if args.force:
193 if wheel_found and sha256 != sha256_on_mirror:
194 print(
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700195 f"WARNING: The next upload will change sha256 for {wheel}!"
Philipp Schrader868070a2022-09-06 22:51:13 -0700196 )
197 wheels_to_be_uploaded.append(wheel)
198 else:
199 if wheel_found:
200 sha256 = sha256_on_mirror
201 else:
202 wheels_to_be_uploaded.append(wheel)
203
204 # Update the override information for this wheel.
205 # We use lower-case for the package names here because that's what the
206 # requirements.lock.txt file uses.
207 info = Wheel(wheel)
208 override_information[f"{info.name.lower()}=={info.version}"] = {
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700209 "url": override_url,
Philipp Schrader868070a2022-09-06 22:51:13 -0700210 "sha256": sha256,
211 }
212
213 print(f"We need to upload {len(wheels_to_be_uploaded)} wheels:")
214 for wheel in wheels_to_be_uploaded:
215 print(wheel)
216
217 # Create a tarball of all the wheels that need to be mirrored.
218 py_deps_tar = root_dir / "py_deps.tar"
219 with tarfile.open(py_deps_tar, "w") as tar:
220 for wheel in wheels_to_be_uploaded:
221 tar.add(wheel, arcname=wheel.name)
222
223 # Upload the wheels if requested.
224 if wheels_to_be_uploaded and args.ssh_host:
225 copy_to_host_and_unpack(py_deps_tar, args.ssh_host)
226 else:
227 print("Skipping mirroring because of lack of --ssh_host or there's "
228 "nothing to actually mirror.")
229
230 # Write out the overrides file.
231 override_file = python_dir / "whl_overrides.json"
232 override_file.write_text(
233 json.dumps(override_information, indent=4, sort_keys=True) + "\n")
234
235
236if __name__ == "__main__":
237 sys.exit(main(sys.argv))