blob: 0581054960f586de438637e8f7bee03bbb78a1a9 [file] [log] [blame]
Philipp Schrader868070a2022-09-06 22:51:13 -07001"""This script mirrors our pip package dependencies.
2
3This script looks at the requirements.lock.txt file and generate a wheel for
4each entry. Those wheels are then mirrored.
5
6See tools/python/README.md for some more information.
7"""
8
9import argparse
10import hashlib
11import json
12import os
13import pwd
14import subprocess
15import sys
16import tarfile
17from pathlib import Path
18from typing import List, Optional, Tuple
19
20import requests
21from pkginfo import Wheel
22
Philipp Schrader43fe1762024-03-13 13:30:15 -070023PLAT = "manylinux_2_34"
Philipp Schrader868070a2022-09-06 22:51:13 -070024ARCH = "x86_64"
25WHEELHOUSE_MIRROR_URL = "https://software.frc971.org/Build-Dependencies/wheelhouse"
26PY_DEPS_WWWW_DIR = "/var/www/html/files/frc971/Build-Dependencies/wheelhouse"
27
28
Philipp Schrader469a2f22022-10-29 14:25:58 -070029def sanitize_name(name: str) -> str:
30 """Sanitizes a package name so it's consistent across all use cases.
31
32 pip is really inconsistent about using real package names vs. whatever
33 users typed into the requirements file. It feels random.
34 Everything is lower-cased and dashes are replaced by underscores.
35
36 Args:
37 name: The name to sanitize.
38
39 Returns:
40 The sanitized name.
41 """
42 return name.lower().replace("-", "_").replace(".", "_")
43
44
Philipp Schrader868070a2022-09-06 22:51:13 -070045def compute_sha256(data: bytes) -> str:
46 """Computes the sha256 checksum of a bytes sequence.
47
48 Args:
49 data: The bytes to checksum.
50
51 Returns:
52 The hex representation of the checksum.
53 """
54 hasher = hashlib.sha256()
55 hasher.update(data)
56 return hasher.hexdigest()
57
58
59def compute_file_sha256(filename: Path) -> str:
60 """Computes the sha256 checksum of the content of a file.
61
62 Args:
63 filename: The file to checksum.
64
65 Returns:
66 The hex representation of the checksum.
67 """
68 return compute_sha256(filename.read_bytes())
69
70
71def search_for_uploaded_wheel(wheel: Path, wheel_url: str) -> Tuple[bool, str]:
72 """Searches for this wheel on our internal mirror.
73
74 Since we can't build wheels reproducibly, our best option is to check
75 whether this wheel already exists on the mirror. If it does, we can skip
76 uploading it.
77
78 Args:
79 wheel: The wheel to search for on the mirror.
80 wheel_url: The URL where the wheel is expected if it exists on the mirror.
81
82 Returns:
83 A two-tuple. The first value is a boolean that signifies whether the
84 wheel was found on the mirror. The second value is a string. If the
85 wheel was not found on the mirror, this is an empty string. Otherwise,
86 this string contains the sha256 checksum of the wheel found on the
87 mirror.
88 """
89 # TODO(phil): A better way to do this would be to SSH into the host and
90 # look for files on the filesystem.
91 request = requests.get(wheel_url)
92
93 if request.status_code == 200:
94 return True, compute_sha256(request.content)
95 if request.status_code == 404:
96 return False, ""
97
98 raise RuntimeError(
99 f"Don't know what to do with status code {request.status_cdoe} when trying to get {wheel_url}"
100 )
101
102
103def copy_to_host_and_unpack(filename: str, ssh_host: str) -> None:
104 """Copies the tarball of wheels to the server and unpacks the tarball.
105
106 Args:
107 filename: The path to the tarball to be uploaded.
108 ssh_host: The server that will be passed to ssh(1) for uploading and
109 unpacking the tarball.
110 """
111 # TODO(phil): De-duplicate with tools/go/mirror_go_repos.py
112
113 subprocess.run(["scp", filename, f"{ssh_host}:"], check=True)
114
115 # Be careful not to use single quotes in these commands to avoid breaking
116 # the subprocess.run() invocation below.
117 command = " && ".join([
118 f"mkdir -p {PY_DEPS_WWWW_DIR}",
119 f"tar -C {PY_DEPS_WWWW_DIR} --no-same-owner -xvaf {filename.name}",
120 # Change the permissions so other users can read them (and checksum
121 # them).
122 f"find {PY_DEPS_WWWW_DIR}/ -type f -exec chmod 644 {{}} +",
123 ])
124
125 print("You might be asked for your sudo password shortly.")
126 subprocess.run(
127 ["ssh", "-t", ssh_host, f"sudo -u www-data bash -c '{command}'"],
128 check=True)
129
130
131def main(argv: List[str]) -> Optional[int]:
132 parser = argparse.ArgumentParser()
133 parser.add_argument(
134 "-f",
135 "--force",
136 action="store_true",
137 help=("If set, ignores packages we have already uploaded and "
138 "possibly overwrite them with the just-built ones. Use with "
139 "extreme caution! This may easily cause issues with building "
140 "older commits. Use this only if you know what you're doing."))
141 parser.add_argument(
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700142 "-l",
143 "--local_test",
144 action="store_true",
145 help=("If set, generate the URL overrides pointing at the generated "
146 "local files. Incompatible with --ssh_host. This is useful for "
147 "iterating on generated wheel files."))
148 parser.add_argument(
Philipp Schrader868070a2022-09-06 22:51:13 -0700149 "--ssh_host",
150 type=str,
151 help=("The SSH host to copy the downloaded Go repositories to. This "
152 "should be software.971spartans.net where all the "
153 "Build-Dependencies files live. Only specify this if you have "
154 "access to the server."))
155 args = parser.parse_args(argv[1:])
156
157 root_dir = Path(os.environ["BUILD_WORKSPACE_DIRECTORY"])
158 caller = os.getenv("SUDO_USER") or os.environ["USER"]
159 caller_id = pwd.getpwnam(caller).pw_uid
160
161 python_dir = root_dir / "tools" / "python"
162
Philipp Schrader49902d42022-10-29 14:09:18 -0700163 container_tag = f"pip-compile:{caller}"
164
165 subprocess.run([
166 "docker",
167 "build",
168 "--file=generate_pip_packages.Dockerfile",
169 f"--tag={container_tag}",
170 ".",
171 ],
172 cwd=python_dir,
173 check=True)
174
Philipp Schrader868070a2022-09-06 22:51:13 -0700175 # Run the wheel generation script inside the docker container provided by
176 # the pypa/manylinux project.
177 # https://github.com/pypa/manylinux/
178 subprocess.run([
179 "docker",
180 "run",
181 "-it",
182 "-v",
183 f"{python_dir}:/opt/971_build/",
Philipp Schrader49902d42022-10-29 14:09:18 -0700184 container_tag,
Philipp Schrader868070a2022-09-06 22:51:13 -0700185 "/opt/971_build/generate_pip_packages_in_docker.sh",
186 PLAT,
187 ARCH,
Philipp Schrader868070a2022-09-06 22:51:13 -0700188 str(caller_id),
189 ],
190 check=True)
191
192 # Get the list of wheels we downloaded form pypi.org or built ourselves.
193 wheelhouse = python_dir / "wheelhouse"
194 wheels = wheelhouse.glob("*.whl")
195
196 # Assemble the override list. This list will tell rules_python to download
197 # from our mirror instead of pypi.org.
198 wheels_to_be_uploaded = []
199 override_information = {}
200 for wheel in sorted(wheels):
201 wheel_url = f"{WHEELHOUSE_MIRROR_URL}/{wheel.name}"
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700202 if args.local_test:
203 override_url = f"file://{wheel.resolve()}"
204 else:
205 override_url = wheel_url
Philipp Schrader868070a2022-09-06 22:51:13 -0700206 sha256 = compute_file_sha256(wheel)
207
208 # Check if we already have the wheel uploaded. If so, download that one
209 # into the wheelhouse. This lets us avoid non-reproducibility with pip
210 # and native extensions.
211 # https://github.com/pypa/pip/issues/9604
212 wheel_found, sha256_on_mirror = search_for_uploaded_wheel(
213 wheel, wheel_url)
214
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700215 if args.local_test:
216 wheel_found = False
217
Philipp Schrader868070a2022-09-06 22:51:13 -0700218 if args.force:
219 if wheel_found and sha256 != sha256_on_mirror:
220 print(
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700221 f"WARNING: The next upload will change sha256 for {wheel}!"
Philipp Schrader868070a2022-09-06 22:51:13 -0700222 )
223 wheels_to_be_uploaded.append(wheel)
224 else:
225 if wheel_found:
226 sha256 = sha256_on_mirror
227 else:
228 wheels_to_be_uploaded.append(wheel)
229
230 # Update the override information for this wheel.
231 # We use lower-case for the package names here because that's what the
232 # requirements.lock.txt file uses.
233 info = Wheel(wheel)
Philipp Schrader469a2f22022-10-29 14:25:58 -0700234 override_information[f"{sanitize_name(info.name)}=={info.version}"] = {
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700235 "url": override_url,
Philipp Schrader868070a2022-09-06 22:51:13 -0700236 "sha256": sha256,
237 }
238
239 print(f"We need to upload {len(wheels_to_be_uploaded)} wheels:")
240 for wheel in wheels_to_be_uploaded:
241 print(wheel)
242
243 # Create a tarball of all the wheels that need to be mirrored.
244 py_deps_tar = root_dir / "py_deps.tar"
245 with tarfile.open(py_deps_tar, "w") as tar:
246 for wheel in wheels_to_be_uploaded:
247 tar.add(wheel, arcname=wheel.name)
248
249 # Upload the wheels if requested.
250 if wheels_to_be_uploaded and args.ssh_host:
251 copy_to_host_and_unpack(py_deps_tar, args.ssh_host)
252 else:
253 print("Skipping mirroring because of lack of --ssh_host or there's "
254 "nothing to actually mirror.")
255
256 # Write out the overrides file.
257 override_file = python_dir / "whl_overrides.json"
258 override_file.write_text(
259 json.dumps(override_information, indent=4, sort_keys=True) + "\n")
260
261
262if __name__ == "__main__":
263 sys.exit(main(sys.argv))