Add memory limit enforcement to aos_starter
This gives us a hammer to figure out who is running us out of RAM by
allocating it. That'll let us kill the culprit rather than letting the
OOM killer pick for us.
Change-Id: Id270b878e908f0bf296ed5fc176e327e9b6c2d5a
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/configuration.fbs b/aos/configuration.fbs
index 7595167..2b25a92 100644
--- a/aos/configuration.fbs
+++ b/aos/configuration.fbs
@@ -152,6 +152,10 @@
// Indicates that application should automatically restart on failure.
autorestart:bool = true (id: 7);
+
+ // If set, this is the memory limit to enforce in bytes for the application
+ // (and it's children)
+ memory_limit:uint64 = 0 (id: 8);
}
// Per node data and connection information.
diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index a463f4f..e86d2a9 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc
@@ -3666,9 +3666,9 @@
}
constexpr std::string_view kCombinedConfigSha1(
- "bcc66bc13a90a4a268649744e244129c5d024f5abd67587dcfbd7158d8abfc44");
+ "99bf9b377d9b9a23eaf7de012e9fcc3840fd7d28751461824cd304b27761b81d");
constexpr std::string_view kSplitConfigSha1(
- "d97e998164a6f1bf078aad77ef127329728ac9198a13a5ab8d5f30d84a932662");
+ "123315dce9600c4672d0f86757486f8a15a5a9649aa3d468ea1bdc5d3c243d54");
INSTANTIATE_TEST_SUITE_P(
All, MultinodeLoggerTest,
diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
index 0b7bdd3..2886c98 100644
--- a/aos/starter/subprocess.cc
+++ b/aos/starter/subprocess.cc
@@ -10,6 +10,71 @@
namespace aos::starter {
+// RAII class to become root and restore back to the original user and group
+// afterwards.
+class Sudo {
+ public:
+ Sudo() {
+ // Save what we were.
+ PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
+ PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
+
+ // Become root.
+ PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
+ << ": Failed to become root";
+ PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
+ << ": Failed to become root";
+ }
+
+ ~Sudo() {
+ // And recover.
+ PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
+ PCHECK(setresuid(ruid_, euid_, suid_) == 0);
+ }
+
+ uid_t ruid_, euid_, suid_;
+ gid_t rgid_, egid_, sgid_;
+};
+
+MemoryCGroup::MemoryCGroup(std::string_view name)
+ : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)) {
+ Sudo sudo;
+ int ret = mkdir(cgroup_.c_str(), 0755);
+
+ if (ret != 0) {
+ if (errno == EEXIST) {
+ PCHECK(remove(cgroup_.c_str()) == 0)
+ << ": Failed to remove previous cgroup " << cgroup_;
+ ret = mkdir(cgroup_.c_str(), 0755);
+ }
+ }
+
+ if (ret != 0) {
+ PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
+ << ", do you have permission?";
+ }
+}
+
+void MemoryCGroup::AddTid(pid_t pid) {
+ if (pid == 0) {
+ pid = getpid();
+ }
+ Sudo sudo;
+ util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
+ std::to_string(pid));
+}
+
+void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
+ Sudo sudo;
+ util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
+ std::to_string(limit_value));
+}
+
+MemoryCGroup::~MemoryCGroup() {
+ Sudo sudo;
+ PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
+}
+
SignalListener::SignalListener(aos::ShmEventLoop *loop,
std::function<void(signalfd_siginfo)> callback)
: SignalListener(loop, callback,
@@ -82,6 +147,10 @@
if (application->has_args()) {
set_args(*application->args());
}
+
+ if (application->has_memory_limit() && application->memory_limit() > 0) {
+ SetMemoryLimit(application->memory_limit());
+ }
}
void Application::DoStart() {
@@ -133,6 +202,10 @@
return;
}
+ if (memory_cgroup_) {
+ memory_cgroup_->AddTid();
+ }
+
// Since we are the child process, clear our read-side of all the pipes.
status_pipes_.read.reset();
stdout_pipes_.read.reset();
diff --git a/aos/starter/subprocess.h b/aos/starter/subprocess.h
index a4d7cbb..bacc574 100644
--- a/aos/starter/subprocess.h
+++ b/aos/starter/subprocess.h
@@ -35,6 +35,22 @@
DISALLOW_COPY_AND_ASSIGN(SignalListener);
};
+// Class to use the V1 cgroup API to limit memory usage.
+class MemoryCGroup {
+ public:
+ MemoryCGroup(std::string_view name);
+ ~MemoryCGroup();
+
+ // Adds a thread ID to be managed by the cgroup.
+ void AddTid(pid_t pid = 0);
+
+ // Sets the provided limit to the provided value.
+ void SetLimit(std::string_view limit_name, uint64_t limit_value);
+
+ private:
+ std::string cgroup_;
+};
+
// Manages a running process, allowing starting and stopping, and restarting
// automatically.
class Application {
@@ -80,6 +96,14 @@
const std::string &GetStderr();
std::optional<int> exit_code() const { return exit_code_; }
+ // Sets the memory limit for the application to the provided limit.
+ void SetMemoryLimit(size_t limit) {
+ if (!memory_cgroup_) {
+ memory_cgroup_ = std::make_unique<MemoryCGroup>(name_);
+ }
+ memory_cgroup_->SetLimit("memory.limit_in_bytes", limit);
+ }
+
private:
typedef aos::util::ScopedPipe::PipePair PipePair;
void set_args(
@@ -144,6 +168,8 @@
std::function<void()> on_change_;
+ std::unique_ptr<MemoryCGroup> memory_cgroup_;
+
DISALLOW_COPY_AND_ASSIGN(Application);
};