Move ScopedPipe/Application classes out of starterd_lib.*
Change-Id: I1b66ef343b6d4d1129fdc8d40781d1e5b711d2b2
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
new file mode 100644
index 0000000..e68f604
--- /dev/null
+++ b/aos/starter/subprocess.cc
@@ -0,0 +1,390 @@
+#include "aos/starter/subprocess.h"
+
+#include <grp.h>
+#include <pwd.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "glog/logging.h"
+
+namespace aos::starter {
+
+SignalListener::SignalListener(aos::ShmEventLoop *loop,
+ std::function<void(signalfd_siginfo)> callback)
+ : SignalListener(loop, callback,
+ {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
+ SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
+
+SignalListener::SignalListener(aos::ShmEventLoop *loop,
+ std::function<void(signalfd_siginfo)> callback,
+ std::initializer_list<unsigned int> signals)
+ : loop_(loop), callback_(std::move(callback)), signalfd_(signals) {
+ loop->epoll()->OnReadable(signalfd_.fd(), [this] {
+ signalfd_siginfo info = signalfd_.Read();
+
+ if (info.ssi_signo == 0) {
+ LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
+ return;
+ }
+
+ callback_(info);
+ });
+}
+
+SignalListener::~SignalListener() { loop_->epoll()->DeleteFd(signalfd_.fd()); }
+
+Application::Application(const aos::Application *application,
+ aos::EventLoop *event_loop,
+ std::function<void()> on_change)
+ : name_(application->name()->string_view()),
+ path_(application->has_executable_name()
+ ? application->executable_name()->string_view()
+ : application->name()->string_view()),
+ args_(1),
+ user_name_(application->has_user() ? application->user()->str() : ""),
+ user_(application->has_user() ? FindUid(user_name_.c_str())
+ : std::nullopt),
+ group_(application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
+ : std::nullopt),
+ autostart_(application->autostart()),
+ autorestart_(application->autorestart()),
+ event_loop_(event_loop),
+ start_timer_(event_loop_->AddTimer([this] {
+ status_ = aos::starter::State::RUNNING;
+ LOG(INFO) << "Started '" << name_ << "' pid: " << pid_;
+ })),
+ restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
+ stop_timer_(event_loop_->AddTimer([this] {
+ if (kill(pid_, SIGKILL) == 0) {
+ LOG(WARNING) << "Failed to stop, sending SIGKILL to '" << name_
+ << "' pid: " << pid_;
+ }
+ })),
+ on_change_(on_change) {}
+
+void Application::DoStart() {
+ if (status_ != aos::starter::State::WAITING) {
+ return;
+ }
+
+ start_timer_->Disable();
+ restart_timer_->Disable();
+
+ std::tie(read_pipe_, write_pipe_) = util::ScopedPipe::MakePipe();
+
+ const pid_t pid = fork();
+
+ if (pid != 0) {
+ if (pid == -1) {
+ PLOG(WARNING) << "Failed to fork '" << name_ << "'";
+ stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
+ status_ = aos::starter::State::STOPPED;
+ } else {
+ pid_ = pid;
+ id_ = next_id_++;
+ start_time_ = event_loop_->monotonic_now();
+ status_ = aos::starter::State::STARTING;
+ LOG(INFO) << "Starting '" << name_ << "' pid " << pid_;
+
+ // Setup timer which moves application to RUNNING state if it is still
+ // alive in 1 second.
+ start_timer_->Setup(event_loop_->monotonic_now() +
+ std::chrono::seconds(1));
+ }
+ on_change_();
+ return;
+ }
+
+ // Clear out signal mask of parent so forked process receives all signals
+ // normally.
+ sigset_t empty_mask;
+ sigemptyset(&empty_mask);
+ sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
+
+ // Cleanup children if starter dies in a way that is not handled gracefully.
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
+ write_pipe_.Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
+ PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
+ }
+
+ if (group_) {
+ CHECK(!user_name_.empty());
+ // The manpage for setgroups says we just need CAP_SETGID, but empirically
+ // we also need the effective UID to be 0 to make it work. user_ must also
+ // be set so we change this effective UID back later.
+ CHECK(user_);
+ if (seteuid(0) == -1) {
+ write_pipe_.Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
+ PLOG(FATAL) << "Could not seteuid(0) for " << name_
+ << " in preparation for setting groups";
+ }
+ if (initgroups(user_name_.c_str(), *group_) == -1) {
+ write_pipe_.Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
+ PLOG(FATAL) << "Could not initialize normal groups for " << name_
+ << " as " << user_name_ << " with " << *group_;
+ }
+ if (setgid(*group_) == -1) {
+ write_pipe_.Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
+ PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
+ }
+ }
+
+ if (user_) {
+ if (setuid(*user_) == -1) {
+ write_pipe_.Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
+ PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
+ }
+ }
+
+ // argv[0] should be the program name
+ args_.insert(args_.begin(), path_.data());
+
+ execvp(path_.c_str(), args_.data());
+
+ // If we got here, something went wrong
+ write_pipe_.Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
+ PLOG(WARNING) << "Could not execute " << name_ << " (" << path_ << ')';
+
+ _exit(EXIT_FAILURE);
+}
+
+void Application::DoStop(bool restart) {
+ // If stop or restart received, the old state of these is no longer applicable
+ // so cancel both.
+ restart_timer_->Disable();
+ start_timer_->Disable();
+
+ switch (status_) {
+ case aos::starter::State::STARTING:
+ case aos::starter::State::RUNNING: {
+ LOG(INFO) << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
+ << SIGINT;
+ status_ = aos::starter::State::STOPPING;
+
+ kill(pid_, SIGINT);
+
+ // Watchdog timer to SIGKILL application if it is still running 1 second
+ // after SIGINT
+ stop_timer_->Setup(event_loop_->monotonic_now() +
+ std::chrono::seconds(1));
+ queue_restart_ = restart;
+ on_change_();
+ break;
+ }
+ case aos::starter::State::WAITING: {
+ // If waiting to restart, and receives restart, skip the waiting period
+ // and restart immediately. If stop received, all we have to do is move
+ // to the STOPPED state.
+ if (restart) {
+ DoStart();
+ } else {
+ status_ = aos::starter::State::STOPPED;
+ on_change_();
+ }
+ break;
+ }
+ case aos::starter::State::STOPPING: {
+ // If the application is already stopping, then we just need to update the
+ // restart flag to the most recent status.
+ queue_restart_ = restart;
+ break;
+ }
+ case aos::starter::State::STOPPED: {
+ // Restart immediately if the application is already stopped
+ if (restart) {
+ status_ = aos::starter::State::WAITING;
+ DoStart();
+ }
+ break;
+ }
+ }
+}
+
+void Application::QueueStart() {
+ status_ = aos::starter::State::WAITING;
+
+ LOG(INFO) << "Restarting " << name_ << " in 3 seconds";
+ restart_timer_->Setup(event_loop_->monotonic_now() + std::chrono::seconds(3));
+ start_timer_->Disable();
+ stop_timer_->Disable();
+ on_change_();
+}
+
+void Application::set_args(
+ const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
+ args_.clear();
+ std::transform(v.begin(), v.end(), std::back_inserter(args_),
+ [](const flatbuffers::String *str) {
+ return const_cast<char *>(str->c_str());
+ });
+ args_.push_back(nullptr);
+}
+
+std::optional<uid_t> Application::FindUid(const char *name) {
+ // TODO(austin): Use the reentrant version. This should be safe.
+ struct passwd *user_data = getpwnam(name);
+ if (user_data != nullptr) {
+ return user_data->pw_uid;
+ } else {
+ LOG(FATAL) << "Could not find user " << name;
+ return std::nullopt;
+ }
+}
+
+std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
+ // TODO(austin): Use the reentrant version. This should be safe.
+ struct passwd *user_data = getpwnam(name);
+ if (user_data != nullptr) {
+ return user_data->pw_gid;
+ } else {
+ LOG(FATAL) << "Could not find user " << name;
+ return std::nullopt;
+ }
+}
+
+flatbuffers::Offset<aos::starter::ApplicationStatus>
+Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder) {
+ CHECK_NOTNULL(builder);
+ auto name_fbs = builder->CreateString(name_);
+
+ aos::starter::ApplicationStatus::Builder status_builder(*builder);
+ status_builder.add_name(name_fbs);
+ status_builder.add_state(status_);
+ status_builder.add_last_exit_code(exit_code_);
+ status_builder.add_last_stop_reason(stop_reason_);
+ if (pid_ != -1) {
+ status_builder.add_pid(pid_);
+ status_builder.add_id(id_);
+ }
+ status_builder.add_last_start_time(start_time_.time_since_epoch().count());
+ return status_builder.Finish();
+}
+
+void Application::Terminate() {
+ stop_reason_ = aos::starter::LastStopReason::TERMINATE;
+ DoStop(false);
+ terminating_ = true;
+}
+
+void Application::HandleCommand(aos::starter::Command cmd) {
+ switch (cmd) {
+ case aos::starter::Command::START: {
+ switch (status_) {
+ case aos::starter::State::WAITING: {
+ restart_timer_->Disable();
+ DoStart();
+ break;
+ }
+ case aos::starter::State::STARTING: {
+ break;
+ }
+ case aos::starter::State::RUNNING: {
+ break;
+ }
+ case aos::starter::State::STOPPING: {
+ queue_restart_ = true;
+ break;
+ }
+ case aos::starter::State::STOPPED: {
+ status_ = aos::starter::State::WAITING;
+ DoStart();
+ break;
+ }
+ }
+ break;
+ }
+ case aos::starter::Command::STOP: {
+ stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
+ DoStop(false);
+ break;
+ }
+ case aos::starter::Command::RESTART: {
+ stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
+ DoStop(true);
+ break;
+ }
+ }
+}
+
+bool Application::MaybeHandleSignal() {
+ int status;
+
+ // Check if the status of this process has changed
+ if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
+ return false;
+ }
+
+ // Check that the event was the process exiting
+ if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
+ return false;
+ }
+
+ exit_time_ = event_loop_->monotonic_now();
+ exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
+
+ if (auto read_result = read_pipe_.Read()) {
+ stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
+ }
+
+ switch (status_) {
+ case aos::starter::State::STARTING: {
+ LOG(WARNING) << "Failed to start '" << name_ << "' on pid " << pid_
+ << " : Exited with status " << exit_code_;
+ if (autorestart()) {
+ QueueStart();
+ }
+ break;
+ }
+ case aos::starter::State::RUNNING: {
+ if (exit_code_ == 0) {
+ LOG(INFO) << "Application '" << name_ << "' pid " << pid_
+ << " exited with status " << exit_code_;
+ } else {
+ LOG(WARNING) << "Application '" << name_ << "' pid " << pid_
+ << " exited unexpectedly with status " << exit_code_;
+ }
+ if (autorestart()) {
+ QueueStart();
+ }
+ break;
+ }
+ case aos::starter::State::STOPPING: {
+ LOG(INFO) << "Successfully stopped '" << name_ << "' pid: " << pid_
+ << " with status " << exit_code_;
+ status_ = aos::starter::State::STOPPED;
+
+ // Disable force stop timer since the process already died
+ stop_timer_->Disable();
+
+ on_change_();
+ if (terminating_) {
+ return true;
+ }
+
+ if (queue_restart_) {
+ queue_restart_ = false;
+ status_ = aos::starter::State::WAITING;
+ DoStart();
+ }
+ break;
+ }
+ case aos::starter::State::WAITING:
+ case aos::starter::State::STOPPED: {
+ LOG(FATAL)
+ << "Received signal on process that was already stopped : name: '"
+ << name_ << "' pid: " << pid_;
+ break;
+ }
+ }
+
+ return false;
+}
+
+} // namespace aos::starter