Add memory limit enforcement to aos_starter

This gives us a hammer to figure out who is running us out of RAM by
allocating it.  That'll let us kill the culprit rather than letting the
OOM killer pick for us.

Change-Id: Id270b878e908f0bf296ed5fc176e327e9b6c2d5a
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/configuration.fbs b/aos/configuration.fbs
index 7595167..2b25a92 100644
--- a/aos/configuration.fbs
+++ b/aos/configuration.fbs
@@ -152,6 +152,10 @@
 
   // Indicates that application should automatically restart on failure.
   autorestart:bool = true (id: 7);
+
+  // If set, this is the memory limit to enforce in bytes for the application
+  // (and it's children)
+  memory_limit:uint64 = 0 (id: 8);
 }
 
 // Per node data and connection information.
diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index a463f4f..e86d2a9 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc
@@ -3666,9 +3666,9 @@
 }
 
 constexpr std::string_view kCombinedConfigSha1(
-    "bcc66bc13a90a4a268649744e244129c5d024f5abd67587dcfbd7158d8abfc44");
+    "99bf9b377d9b9a23eaf7de012e9fcc3840fd7d28751461824cd304b27761b81d");
 constexpr std::string_view kSplitConfigSha1(
-    "d97e998164a6f1bf078aad77ef127329728ac9198a13a5ab8d5f30d84a932662");
+    "123315dce9600c4672d0f86757486f8a15a5a9649aa3d468ea1bdc5d3c243d54");
 
 INSTANTIATE_TEST_SUITE_P(
     All, MultinodeLoggerTest,
diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
index 0b7bdd3..2886c98 100644
--- a/aos/starter/subprocess.cc
+++ b/aos/starter/subprocess.cc
@@ -10,6 +10,71 @@
 
 namespace aos::starter {
 
+// RAII class to become root and restore back to the original user and group
+// afterwards.
+class Sudo {
+ public:
+  Sudo() {
+    // Save what we were.
+    PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
+    PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
+
+    // Become root.
+    PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
+        << ": Failed to become root";
+    PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
+        << ": Failed to become root";
+  }
+
+  ~Sudo() {
+    // And recover.
+    PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
+    PCHECK(setresuid(ruid_, euid_, suid_) == 0);
+  }
+
+  uid_t ruid_, euid_, suid_;
+  gid_t rgid_, egid_, sgid_;
+};
+
+MemoryCGroup::MemoryCGroup(std::string_view name)
+    : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)) {
+  Sudo sudo;
+  int ret = mkdir(cgroup_.c_str(), 0755);
+
+  if (ret != 0) {
+    if (errno == EEXIST) {
+      PCHECK(remove(cgroup_.c_str()) == 0)
+          << ": Failed to remove previous cgroup " << cgroup_;
+      ret = mkdir(cgroup_.c_str(), 0755);
+    }
+  }
+
+  if (ret != 0) {
+    PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
+                << ", do you have permission?";
+  }
+}
+
+void MemoryCGroup::AddTid(pid_t pid) {
+  if (pid == 0) {
+    pid = getpid();
+  }
+  Sudo sudo;
+  util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
+                               std::to_string(pid));
+}
+
+void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
+  Sudo sudo;
+  util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
+                               std::to_string(limit_value));
+}
+
+MemoryCGroup::~MemoryCGroup() {
+  Sudo sudo;
+  PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
+}
+
 SignalListener::SignalListener(aos::ShmEventLoop *loop,
                                std::function<void(signalfd_siginfo)> callback)
     : SignalListener(loop, callback,
@@ -82,6 +147,10 @@
   if (application->has_args()) {
     set_args(*application->args());
   }
+
+  if (application->has_memory_limit() && application->memory_limit() > 0) {
+    SetMemoryLimit(application->memory_limit());
+  }
 }
 
 void Application::DoStart() {
@@ -133,6 +202,10 @@
     return;
   }
 
+  if (memory_cgroup_) {
+    memory_cgroup_->AddTid();
+  }
+
   // Since we are the child process, clear our read-side of all the pipes.
   status_pipes_.read.reset();
   stdout_pipes_.read.reset();
diff --git a/aos/starter/subprocess.h b/aos/starter/subprocess.h
index a4d7cbb..bacc574 100644
--- a/aos/starter/subprocess.h
+++ b/aos/starter/subprocess.h
@@ -35,6 +35,22 @@
   DISALLOW_COPY_AND_ASSIGN(SignalListener);
 };
 
+// Class to use the V1 cgroup API to limit memory usage.
+class MemoryCGroup {
+ public:
+  MemoryCGroup(std::string_view name);
+  ~MemoryCGroup();
+
+  // Adds a thread ID to be managed by the cgroup.
+  void AddTid(pid_t pid = 0);
+
+  // Sets the provided limit to the provided value.
+  void SetLimit(std::string_view limit_name, uint64_t limit_value);
+
+ private:
+  std::string cgroup_;
+};
+
 // Manages a running process, allowing starting and stopping, and restarting
 // automatically.
 class Application {
@@ -80,6 +96,14 @@
   const std::string &GetStderr();
   std::optional<int> exit_code() const { return exit_code_; }
 
+  // Sets the memory limit for the application to the provided limit.
+  void SetMemoryLimit(size_t limit) {
+    if (!memory_cgroup_) {
+      memory_cgroup_ = std::make_unique<MemoryCGroup>(name_);
+    }
+    memory_cgroup_->SetLimit("memory.limit_in_bytes", limit);
+  }
+
  private:
   typedef aos::util::ScopedPipe::PipePair PipePair;
   void set_args(
@@ -144,6 +168,8 @@
 
   std::function<void()> on_change_;
 
+  std::unique_ptr<MemoryCGroup> memory_cgroup_;
+
   DISALLOW_COPY_AND_ASSIGN(Application);
 };