Add a script to allow more users to easily upload artifacts

This makes it so that anyone with access to buildkite can upload an
artifact to the build server. Existing protections entail:
1. Doesn't allow users to run this against anything other than the
   master branch.
2. Does not allow overwriting existing files.
3. Does not touch files outside of the
   /data/files/frc971/Build-Dependencies/ directory.

Certainly no riskier than our regular CI at this stage.

This does cause a surprising amount of disk usage on the software971
server, due to bazel it extracting the amd64 toolchains.

Steps required to get this set up were:
1. Install the buildkite-agent apt package.
2. Configure the token in /buildkite/buildkite-agent.cfg, and set
   queue=deploy.
3. Start the buildkite-agent service.
4. Add the buildkite-agent user to the www-data group.
5. Add a new gerrit public key for the buildkite user corresponding to
   the new agent.
6. Set up the https://buildkite.com/spartan-robotics/deploy-artifact
   pipeline.

Signed-off-by: James Kuszmaul <jabukuszmaul+collab@gmail.com>
Change-Id: I89416d23e8d75102314c7aa1dbe8b9fd64f4f762
diff --git a/tools/rehosting/BUILD b/tools/rehosting/BUILD
new file mode 100644
index 0000000..0eb1a18
--- /dev/null
+++ b/tools/rehosting/BUILD
@@ -0,0 +1,11 @@
+py_test(
+    name = "rehost_test",
+    srcs = ["rehost_test.py"],
+    deps = [":rehost"],
+)
+
+py_binary(
+    name = "rehost",
+    srcs = ["rehost.py"],
+    deps = ["@pip//validators"],
+)
diff --git a/tools/rehosting/README b/tools/rehosting/README
new file mode 100644
index 0000000..42da3bf
--- /dev/null
+++ b/tools/rehosting/README
@@ -0,0 +1,6 @@
+Notes on setup:
+
+1. The buildkite pipeline is set up to enforce that we only run this against
+   the master branch, to prevent users from running un-checked-in scripts to
+   muck with the build dependency server.
+2. Runs against the queue=deploy buildkite queue.
diff --git a/tools/rehosting/rehost.py b/tools/rehosting/rehost.py
new file mode 100644
index 0000000..8e5cd1e
--- /dev/null
+++ b/tools/rehosting/rehost.py
@@ -0,0 +1,61 @@
+from urllib.request import urlopen
+from urllib.parse import urlparse
+import validators
+import shutil
+import stat
+from pathlib import Path
+import os
+import sys
+
+BUILD_DEPENDENCIES_PATH = "/data/files/frc971/Build-Dependencies/"
+WWW_GROUP = "www-data"
+
+
+def get_url() -> str:
+    return sys.argv[1]
+
+
+def validate_url(url: str) -> str:
+    # We have no reason to allow people do download things from IP addresses directly.
+    if not validators.url(
+            url, simple_host=True, skip_ipv4_addr=True, skip_ipv6_addr=True):
+        raise ValueError(f"Invalid URL {url}")
+    return url
+
+
+def url_to_path(url: str) -> Path:
+    parsed = urlparse(url)
+    # Strip out the http:// and any other extraneous junk:
+    path = (Path(BUILD_DEPENDENCIES_PATH) /
+            (parsed.netloc + parsed.path)).resolve()
+    # Confirm that someone didn't sneak in a URL that looks like http://foo.bar/../../../.. or something.
+    path.relative_to(BUILD_DEPENDENCIES_PATH)
+    if path.exists():
+        raise FileExistsError(f"There is already a file uploaded for {url}.")
+    return path
+
+
+def download():
+    url = validate_url(get_url())
+    path = url_to_path(url)
+    path.parent.mkdir(mode=0o775, parents=True, exist_ok=True)
+
+    with urlopen(url) as downloaded:
+        with open(path, 'wb') as output:
+            output.write(downloaded.read())
+
+    relative_path = path.relative_to(BUILD_DEPENDENCIES_PATH)
+    path.chmod(stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
+    try:
+        shutil.chown(path, group=WWW_GROUP)
+        for parent in relative_path.parents:
+            shutil.chown(Path(BUILD_DEPENDENCIES_PATH) / parent,
+                         group=WWW_GROUP)
+    except Exception:
+        # The chown's sometimes fail if they get to a manually-created/touched
+        # directory; don't worry about that if it happens..
+        pass
+
+
+if __name__ == "__main__":
+    download()
diff --git a/tools/rehosting/rehost.sh b/tools/rehosting/rehost.sh
new file mode 100755
index 0000000..445d05e
--- /dev/null
+++ b/tools/rehosting/rehost.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+tools/bazel run //tools/rehosting:rehost -- "$(buildkite-agent meta-data get dependency-url)"
diff --git a/tools/rehosting/rehost_test.py b/tools/rehosting/rehost_test.py
new file mode 100644
index 0000000..da316e2
--- /dev/null
+++ b/tools/rehosting/rehost_test.py
@@ -0,0 +1,37 @@
+import unittest
+import unittest.mock
+from tools.rehosting import rehost
+from pathlib import Path
+import os
+
+
+class TestRehost(unittest.TestCase):
+
+    def test_url_validation(self):
+        self.assertEqual("http://google.com",
+                         rehost.validate_url("http://google.com"))
+        self.assertRaisesRegex(Exception, "Invalid URL", rehost.validate_url,
+                               "file:///some/secret")
+        self.assertRaisesRegex(Exception, "Invalid URL", rehost.validate_url,
+                               "http://10.0.0.0/secret")
+
+    def test_url_to_path(self):
+        test_dir = os.getenv("TEST_TMPDIR", "/tmp/")
+        with unittest.mock.patch.object(rehost, "BUILD_DEPENDENCIES_PATH",
+                                        test_dir):
+            existing_file = test_dir + "/exists.com"
+            with open(existing_file, 'w') as f:
+                f.write('string')
+            self.assertEqual(
+                Path(test_dir) / "example.com/foo/bar",
+                rehost.url_to_path("https://example.com/foo/bar"))
+            self.assertRaisesRegex(ValueError,
+                                   f"not in the subpath of '{test_dir}'",
+                                   rehost.url_to_path,
+                                   "https://example.com/../../bar")
+            self.assertRaisesRegex(FileExistsError, "There is already a file",
+                                   rehost.url_to_path, "https://exists.com")
+
+
+if __name__ == "__main__":
+    unittest.main()