Build emscripten caches in sandbox

This makes it so that we actually build the various standard
library-like things built within the sandbox rather than being some
random files that I generate ad-hoc and copy into the correct folder.

This requires patching emcc so that it adds the necessary -isystem flags
when building those libraries.

Change-Id: I7b71b36cbbafd511df5d7a8396794aeee2403a05
diff --git a/tools/cpp/emscripten/BUILD b/tools/cpp/emscripten/BUILD
index dd304e1..6a67e4f 100644
--- a/tools/cpp/emscripten/BUILD
+++ b/tools/cpp/emscripten/BUILD
@@ -1,26 +1,46 @@
 package(default_visibility = ["//visibility:public"])
 
+_minimum_fileset = [
+    "emar.sh",
+    "emcc.sh",
+    "@emscripten_clang//:all",
+    "@emscripten_toolchain//:all",
+    "@nodejs//:bin/node",
+]
+
+filegroup(
+    name = "minimum_files",
+    srcs = _minimum_fileset,
+)
+
 filegroup(
     name = "all",
-    srcs = [
-        "emar.sh",
-        "emcc.sh",
+    srcs = _minimum_fileset + [
         ":emscripten_cache_content",
-        "@emscripten_clang//:all",
-        "@emscripten_toolchain//:all",
-        "@nodejs//:bin/node",
     ],
 )
 
-# TODO(james): There is a set of static files that emscripten will always
-# attempt to either rebuild or source from a cache. Ideally, we would make
-# the build of these files part of the normal build process; however, the
-# emscripten compiler handles sandboxing poorly for these files and so it
-# is simplest to just manually run emscripten outside of the sandbox and
-# copy over the cache folder for now.
+# A list of all the cached libraries generad and used by emscripten.
+_libs = \
+    ["emscripten_cache/asmjs/" + lib for lib in [
+        "generated_struct_info.json",
+        "libc.bc",
+        "libcompiler_rt.a",
+        "libc-wasm.bc",
+        "libpthreads_stub.bc",
+        "libhtml5.bc",
+        "libdlmalloc.bc",
+        "libal.bc",
+        "libc++_noexcept.a",
+        "libc++abi.bc",
+        "libgl-webgl2.bc",
+        "libgl.bc",
+        "libc-extras.bc",
+    ]]
+
 filegroup(
     name = "emscripten_cache_content",
-    srcs = glob(["emscripten_cache/**/*"]),
+    srcs = glob(["emscripten_cache/**/*"]) + _libs,
 )
 
 cc_toolchain(
@@ -36,3 +56,22 @@
     strip_files = ":empty",
     supports_param_files = 0,
 )
+
+# TODO(james): Currently, this gets built with the host configuration.
+# Currently, that doesn't actually impact the build since there's nothing that
+# affects how the genrule is run. However, that also means that changing
+# the configuration (and thus the flags that may be passed to the C++
+# compiler) will not change how these cache files are generated.
+genrule(
+    name = "gencache",
+    # Note that foo.o is just some arbitrary .o file. I had trouble getting
+    # emscripten to work properly when pointed at a literally empty file, but
+    # the exact contents of the .o aren't particularly important.
+    srcs = [":foo.o"],
+    outs = _libs,
+    cmd = "$(location gencache.sh) $(OUTS)",
+    tools = [
+        ":gencache.sh",
+        ":minimum_files",
+    ],
+)