blob: 2b18406a8dd4850dcfbda0383f5d6c10e135b7cb [file] [log] [blame]
Philipp Schrader868070a2022-09-06 22:51:13 -07001"""This script mirrors our pip package dependencies.
2
3This script looks at the requirements.lock.txt file and generate a wheel for
4each entry. Those wheels are then mirrored.
5
6See tools/python/README.md for some more information.
7"""
8
9import argparse
10import hashlib
11import json
12import os
13import pwd
14import subprocess
15import sys
16import tarfile
17from pathlib import Path
18from typing import List, Optional, Tuple
19
20import requests
21from pkginfo import Wheel
22
23PYTHON_VERSION = 39
24PLAT = "manylinux_2_28"
25ARCH = "x86_64"
26WHEELHOUSE_MIRROR_URL = "https://software.frc971.org/Build-Dependencies/wheelhouse"
27PY_DEPS_WWWW_DIR = "/var/www/html/files/frc971/Build-Dependencies/wheelhouse"
28
29
30def compute_sha256(data: bytes) -> str:
31 """Computes the sha256 checksum of a bytes sequence.
32
33 Args:
34 data: The bytes to checksum.
35
36 Returns:
37 The hex representation of the checksum.
38 """
39 hasher = hashlib.sha256()
40 hasher.update(data)
41 return hasher.hexdigest()
42
43
44def compute_file_sha256(filename: Path) -> str:
45 """Computes the sha256 checksum of the content of a file.
46
47 Args:
48 filename: The file to checksum.
49
50 Returns:
51 The hex representation of the checksum.
52 """
53 return compute_sha256(filename.read_bytes())
54
55
56def search_for_uploaded_wheel(wheel: Path, wheel_url: str) -> Tuple[bool, str]:
57 """Searches for this wheel on our internal mirror.
58
59 Since we can't build wheels reproducibly, our best option is to check
60 whether this wheel already exists on the mirror. If it does, we can skip
61 uploading it.
62
63 Args:
64 wheel: The wheel to search for on the mirror.
65 wheel_url: The URL where the wheel is expected if it exists on the mirror.
66
67 Returns:
68 A two-tuple. The first value is a boolean that signifies whether the
69 wheel was found on the mirror. The second value is a string. If the
70 wheel was not found on the mirror, this is an empty string. Otherwise,
71 this string contains the sha256 checksum of the wheel found on the
72 mirror.
73 """
74 # TODO(phil): A better way to do this would be to SSH into the host and
75 # look for files on the filesystem.
76 request = requests.get(wheel_url)
77
78 if request.status_code == 200:
79 return True, compute_sha256(request.content)
80 if request.status_code == 404:
81 return False, ""
82
83 raise RuntimeError(
84 f"Don't know what to do with status code {request.status_cdoe} when trying to get {wheel_url}"
85 )
86
87
88def copy_to_host_and_unpack(filename: str, ssh_host: str) -> None:
89 """Copies the tarball of wheels to the server and unpacks the tarball.
90
91 Args:
92 filename: The path to the tarball to be uploaded.
93 ssh_host: The server that will be passed to ssh(1) for uploading and
94 unpacking the tarball.
95 """
96 # TODO(phil): De-duplicate with tools/go/mirror_go_repos.py
97
98 subprocess.run(["scp", filename, f"{ssh_host}:"], check=True)
99
100 # Be careful not to use single quotes in these commands to avoid breaking
101 # the subprocess.run() invocation below.
102 command = " && ".join([
103 f"mkdir -p {PY_DEPS_WWWW_DIR}",
104 f"tar -C {PY_DEPS_WWWW_DIR} --no-same-owner -xvaf {filename.name}",
105 # Change the permissions so other users can read them (and checksum
106 # them).
107 f"find {PY_DEPS_WWWW_DIR}/ -type f -exec chmod 644 {{}} +",
108 ])
109
110 print("You might be asked for your sudo password shortly.")
111 subprocess.run(
112 ["ssh", "-t", ssh_host, f"sudo -u www-data bash -c '{command}'"],
113 check=True)
114
115
116def main(argv: List[str]) -> Optional[int]:
117 parser = argparse.ArgumentParser()
118 parser.add_argument(
119 "-f",
120 "--force",
121 action="store_true",
122 help=("If set, ignores packages we have already uploaded and "
123 "possibly overwrite them with the just-built ones. Use with "
124 "extreme caution! This may easily cause issues with building "
125 "older commits. Use this only if you know what you're doing."))
126 parser.add_argument(
127 "--ssh_host",
128 type=str,
129 help=("The SSH host to copy the downloaded Go repositories to. This "
130 "should be software.971spartans.net where all the "
131 "Build-Dependencies files live. Only specify this if you have "
132 "access to the server."))
133 args = parser.parse_args(argv[1:])
134
135 root_dir = Path(os.environ["BUILD_WORKSPACE_DIRECTORY"])
136 caller = os.getenv("SUDO_USER") or os.environ["USER"]
137 caller_id = pwd.getpwnam(caller).pw_uid
138
139 python_dir = root_dir / "tools" / "python"
140
141 # Run the wheel generation script inside the docker container provided by
142 # the pypa/manylinux project.
143 # https://github.com/pypa/manylinux/
144 subprocess.run([
145 "docker",
146 "run",
147 "-it",
148 "-v",
149 f"{python_dir}:/opt/971_build/",
150 f"quay.io/pypa/{PLAT}_{ARCH}",
151 "/opt/971_build/generate_pip_packages_in_docker.sh",
152 PLAT,
153 ARCH,
154 str(PYTHON_VERSION),
155 str(caller_id),
156 ],
157 check=True)
158
159 # Get the list of wheels we downloaded form pypi.org or built ourselves.
160 wheelhouse = python_dir / "wheelhouse"
161 wheels = wheelhouse.glob("*.whl")
162
163 # Assemble the override list. This list will tell rules_python to download
164 # from our mirror instead of pypi.org.
165 wheels_to_be_uploaded = []
166 override_information = {}
167 for wheel in sorted(wheels):
168 wheel_url = f"{WHEELHOUSE_MIRROR_URL}/{wheel.name}"
169 sha256 = compute_file_sha256(wheel)
170
171 # Check if we already have the wheel uploaded. If so, download that one
172 # into the wheelhouse. This lets us avoid non-reproducibility with pip
173 # and native extensions.
174 # https://github.com/pypa/pip/issues/9604
175 wheel_found, sha256_on_mirror = search_for_uploaded_wheel(
176 wheel, wheel_url)
177
178 if args.force:
179 if wheel_found and sha256 != sha256_on_mirror:
180 print(
181 f"WARNING: The next upload wheel change sha256 for {wheel}!"
182 )
183 wheels_to_be_uploaded.append(wheel)
184 else:
185 if wheel_found:
186 sha256 = sha256_on_mirror
187 else:
188 wheels_to_be_uploaded.append(wheel)
189
190 # Update the override information for this wheel.
191 # We use lower-case for the package names here because that's what the
192 # requirements.lock.txt file uses.
193 info = Wheel(wheel)
194 override_information[f"{info.name.lower()}=={info.version}"] = {
195 "url": wheel_url,
196 "sha256": sha256,
197 }
198
199 print(f"We need to upload {len(wheels_to_be_uploaded)} wheels:")
200 for wheel in wheels_to_be_uploaded:
201 print(wheel)
202
203 # Create a tarball of all the wheels that need to be mirrored.
204 py_deps_tar = root_dir / "py_deps.tar"
205 with tarfile.open(py_deps_tar, "w") as tar:
206 for wheel in wheels_to_be_uploaded:
207 tar.add(wheel, arcname=wheel.name)
208
209 # Upload the wheels if requested.
210 if wheels_to_be_uploaded and args.ssh_host:
211 copy_to_host_and_unpack(py_deps_tar, args.ssh_host)
212 else:
213 print("Skipping mirroring because of lack of --ssh_host or there's "
214 "nothing to actually mirror.")
215
216 # Write out the overrides file.
217 override_file = python_dir / "whl_overrides.json"
218 override_file.write_text(
219 json.dumps(override_information, indent=4, sort_keys=True) + "\n")
220
221
222if __name__ == "__main__":
223 sys.exit(main(sys.argv))