blob: 19bb477739d3e4f64a936ce99e2c7e412bd7f2e1 [file] [log] [blame]
Philipp Schrader868070a2022-09-06 22:51:13 -07001"""This script mirrors our pip package dependencies.
2
3This script looks at the requirements.lock.txt file and generate a wheel for
4each entry. Those wheels are then mirrored.
5
6See tools/python/README.md for some more information.
7"""
8
9import argparse
10import hashlib
11import json
12import os
13import pwd
14import subprocess
15import sys
16import tarfile
17from pathlib import Path
18from typing import List, Optional, Tuple
19
20import requests
21from pkginfo import Wheel
22
Philipp Schrader49902d42022-10-29 14:09:18 -070023PLAT = "manylinux_2_31"
Philipp Schrader868070a2022-09-06 22:51:13 -070024ARCH = "x86_64"
25WHEELHOUSE_MIRROR_URL = "https://software.frc971.org/Build-Dependencies/wheelhouse"
26PY_DEPS_WWWW_DIR = "/var/www/html/files/frc971/Build-Dependencies/wheelhouse"
27
28
29def compute_sha256(data: bytes) -> str:
30 """Computes the sha256 checksum of a bytes sequence.
31
32 Args:
33 data: The bytes to checksum.
34
35 Returns:
36 The hex representation of the checksum.
37 """
38 hasher = hashlib.sha256()
39 hasher.update(data)
40 return hasher.hexdigest()
41
42
43def compute_file_sha256(filename: Path) -> str:
44 """Computes the sha256 checksum of the content of a file.
45
46 Args:
47 filename: The file to checksum.
48
49 Returns:
50 The hex representation of the checksum.
51 """
52 return compute_sha256(filename.read_bytes())
53
54
55def search_for_uploaded_wheel(wheel: Path, wheel_url: str) -> Tuple[bool, str]:
56 """Searches for this wheel on our internal mirror.
57
58 Since we can't build wheels reproducibly, our best option is to check
59 whether this wheel already exists on the mirror. If it does, we can skip
60 uploading it.
61
62 Args:
63 wheel: The wheel to search for on the mirror.
64 wheel_url: The URL where the wheel is expected if it exists on the mirror.
65
66 Returns:
67 A two-tuple. The first value is a boolean that signifies whether the
68 wheel was found on the mirror. The second value is a string. If the
69 wheel was not found on the mirror, this is an empty string. Otherwise,
70 this string contains the sha256 checksum of the wheel found on the
71 mirror.
72 """
73 # TODO(phil): A better way to do this would be to SSH into the host and
74 # look for files on the filesystem.
75 request = requests.get(wheel_url)
76
77 if request.status_code == 200:
78 return True, compute_sha256(request.content)
79 if request.status_code == 404:
80 return False, ""
81
82 raise RuntimeError(
83 f"Don't know what to do with status code {request.status_cdoe} when trying to get {wheel_url}"
84 )
85
86
87def copy_to_host_and_unpack(filename: str, ssh_host: str) -> None:
88 """Copies the tarball of wheels to the server and unpacks the tarball.
89
90 Args:
91 filename: The path to the tarball to be uploaded.
92 ssh_host: The server that will be passed to ssh(1) for uploading and
93 unpacking the tarball.
94 """
95 # TODO(phil): De-duplicate with tools/go/mirror_go_repos.py
96
97 subprocess.run(["scp", filename, f"{ssh_host}:"], check=True)
98
99 # Be careful not to use single quotes in these commands to avoid breaking
100 # the subprocess.run() invocation below.
101 command = " && ".join([
102 f"mkdir -p {PY_DEPS_WWWW_DIR}",
103 f"tar -C {PY_DEPS_WWWW_DIR} --no-same-owner -xvaf {filename.name}",
104 # Change the permissions so other users can read them (and checksum
105 # them).
106 f"find {PY_DEPS_WWWW_DIR}/ -type f -exec chmod 644 {{}} +",
107 ])
108
109 print("You might be asked for your sudo password shortly.")
110 subprocess.run(
111 ["ssh", "-t", ssh_host, f"sudo -u www-data bash -c '{command}'"],
112 check=True)
113
114
115def main(argv: List[str]) -> Optional[int]:
116 parser = argparse.ArgumentParser()
117 parser.add_argument(
118 "-f",
119 "--force",
120 action="store_true",
121 help=("If set, ignores packages we have already uploaded and "
122 "possibly overwrite them with the just-built ones. Use with "
123 "extreme caution! This may easily cause issues with building "
124 "older commits. Use this only if you know what you're doing."))
125 parser.add_argument(
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700126 "-l",
127 "--local_test",
128 action="store_true",
129 help=("If set, generate the URL overrides pointing at the generated "
130 "local files. Incompatible with --ssh_host. This is useful for "
131 "iterating on generated wheel files."))
132 parser.add_argument(
Philipp Schrader868070a2022-09-06 22:51:13 -0700133 "--ssh_host",
134 type=str,
135 help=("The SSH host to copy the downloaded Go repositories to. This "
136 "should be software.971spartans.net where all the "
137 "Build-Dependencies files live. Only specify this if you have "
138 "access to the server."))
139 args = parser.parse_args(argv[1:])
140
141 root_dir = Path(os.environ["BUILD_WORKSPACE_DIRECTORY"])
142 caller = os.getenv("SUDO_USER") or os.environ["USER"]
143 caller_id = pwd.getpwnam(caller).pw_uid
144
145 python_dir = root_dir / "tools" / "python"
146
Philipp Schrader49902d42022-10-29 14:09:18 -0700147 container_tag = f"pip-compile:{caller}"
148
149 subprocess.run([
150 "docker",
151 "build",
152 "--file=generate_pip_packages.Dockerfile",
153 f"--tag={container_tag}",
154 ".",
155 ],
156 cwd=python_dir,
157 check=True)
158
Philipp Schrader868070a2022-09-06 22:51:13 -0700159 # Run the wheel generation script inside the docker container provided by
160 # the pypa/manylinux project.
161 # https://github.com/pypa/manylinux/
162 subprocess.run([
163 "docker",
164 "run",
165 "-it",
166 "-v",
167 f"{python_dir}:/opt/971_build/",
Philipp Schrader49902d42022-10-29 14:09:18 -0700168 container_tag,
Philipp Schrader868070a2022-09-06 22:51:13 -0700169 "/opt/971_build/generate_pip_packages_in_docker.sh",
170 PLAT,
171 ARCH,
Philipp Schrader868070a2022-09-06 22:51:13 -0700172 str(caller_id),
173 ],
174 check=True)
175
176 # Get the list of wheels we downloaded form pypi.org or built ourselves.
177 wheelhouse = python_dir / "wheelhouse"
178 wheels = wheelhouse.glob("*.whl")
179
180 # Assemble the override list. This list will tell rules_python to download
181 # from our mirror instead of pypi.org.
182 wheels_to_be_uploaded = []
183 override_information = {}
184 for wheel in sorted(wheels):
185 wheel_url = f"{WHEELHOUSE_MIRROR_URL}/{wheel.name}"
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700186 if args.local_test:
187 override_url = f"file://{wheel.resolve()}"
188 else:
189 override_url = wheel_url
Philipp Schrader868070a2022-09-06 22:51:13 -0700190 sha256 = compute_file_sha256(wheel)
191
192 # Check if we already have the wheel uploaded. If so, download that one
193 # into the wheelhouse. This lets us avoid non-reproducibility with pip
194 # and native extensions.
195 # https://github.com/pypa/pip/issues/9604
196 wheel_found, sha256_on_mirror = search_for_uploaded_wheel(
197 wheel, wheel_url)
198
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700199 if args.local_test:
200 wheel_found = False
201
Philipp Schrader868070a2022-09-06 22:51:13 -0700202 if args.force:
203 if wheel_found and sha256 != sha256_on_mirror:
204 print(
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700205 f"WARNING: The next upload will change sha256 for {wheel}!"
Philipp Schrader868070a2022-09-06 22:51:13 -0700206 )
207 wheels_to_be_uploaded.append(wheel)
208 else:
209 if wheel_found:
210 sha256 = sha256_on_mirror
211 else:
212 wheels_to_be_uploaded.append(wheel)
213
214 # Update the override information for this wheel.
215 # We use lower-case for the package names here because that's what the
216 # requirements.lock.txt file uses.
217 info = Wheel(wheel)
218 override_information[f"{info.name.lower()}=={info.version}"] = {
Philipp Schrader8ee2c2c2022-10-29 13:32:27 -0700219 "url": override_url,
Philipp Schrader868070a2022-09-06 22:51:13 -0700220 "sha256": sha256,
221 }
222
223 print(f"We need to upload {len(wheels_to_be_uploaded)} wheels:")
224 for wheel in wheels_to_be_uploaded:
225 print(wheel)
226
227 # Create a tarball of all the wheels that need to be mirrored.
228 py_deps_tar = root_dir / "py_deps.tar"
229 with tarfile.open(py_deps_tar, "w") as tar:
230 for wheel in wheels_to_be_uploaded:
231 tar.add(wheel, arcname=wheel.name)
232
233 # Upload the wheels if requested.
234 if wheels_to_be_uploaded and args.ssh_host:
235 copy_to_host_and_unpack(py_deps_tar, args.ssh_host)
236 else:
237 print("Skipping mirroring because of lack of --ssh_host or there's "
238 "nothing to actually mirror.")
239
240 # Write out the overrides file.
241 override_file = python_dir / "whl_overrides.json"
242 override_file.write_text(
243 json.dumps(override_information, indent=4, sort_keys=True) + "\n")
244
245
246if __name__ == "__main__":
247 sys.exit(main(sys.argv))