James Kuszmaul | b109116 | 2023-12-27 16:16:45 -0800 | [diff] [blame] | 1 | from urllib.request import urlopen |
| 2 | from urllib.parse import urlparse |
| 3 | import validators |
| 4 | import shutil |
| 5 | import stat |
| 6 | from pathlib import Path |
| 7 | import os |
| 8 | import sys |
| 9 | |
| 10 | BUILD_DEPENDENCIES_PATH = "/data/files/frc971/Build-Dependencies/" |
| 11 | WWW_GROUP = "www-data" |
| 12 | |
| 13 | |
| 14 | def get_url() -> str: |
| 15 | return sys.argv[1] |
| 16 | |
| 17 | |
| 18 | def validate_url(url: str) -> str: |
| 19 | # We have no reason to allow people do download things from IP addresses directly. |
| 20 | if not validators.url( |
| 21 | url, simple_host=True, skip_ipv4_addr=True, skip_ipv6_addr=True): |
| 22 | raise ValueError(f"Invalid URL {url}") |
| 23 | return url |
| 24 | |
| 25 | |
| 26 | def url_to_path(url: str) -> Path: |
| 27 | parsed = urlparse(url) |
| 28 | # Strip out the http:// and any other extraneous junk: |
| 29 | path = (Path(BUILD_DEPENDENCIES_PATH) / |
| 30 | (parsed.netloc + parsed.path)).resolve() |
| 31 | # Confirm that someone didn't sneak in a URL that looks like http://foo.bar/../../../.. or something. |
| 32 | path.relative_to(BUILD_DEPENDENCIES_PATH) |
| 33 | if path.exists(): |
| 34 | raise FileExistsError(f"There is already a file uploaded for {url}.") |
| 35 | return path |
| 36 | |
| 37 | |
| 38 | def download(): |
| 39 | url = validate_url(get_url()) |
| 40 | path = url_to_path(url) |
| 41 | path.parent.mkdir(mode=0o775, parents=True, exist_ok=True) |
| 42 | |
| 43 | with urlopen(url) as downloaded: |
| 44 | with open(path, 'wb') as output: |
| 45 | output.write(downloaded.read()) |
| 46 | |
| 47 | relative_path = path.relative_to(BUILD_DEPENDENCIES_PATH) |
| 48 | path.chmod(stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) |
| 49 | try: |
| 50 | shutil.chown(path, group=WWW_GROUP) |
| 51 | for parent in relative_path.parents: |
| 52 | shutil.chown(Path(BUILD_DEPENDENCIES_PATH) / parent, |
| 53 | group=WWW_GROUP) |
| 54 | except Exception: |
| 55 | # The chown's sometimes fail if they get to a manually-created/touched |
| 56 | # directory; don't worry about that if it happens.. |
| 57 | pass |
| 58 | |
| 59 | |
| 60 | if __name__ == "__main__": |
| 61 | download() |