Merge "Backport google/googletest#2815 into Ceres's internal copy"
diff --git a/aos/configuration.fbs b/aos/configuration.fbs
index 3401b43..c0d67b0 100644
--- a/aos/configuration.fbs
+++ b/aos/configuration.fbs
@@ -146,6 +146,9 @@
// Indicates that application should be executed on boot.
autostart:bool = true (id: 6);
+
+ // Indicates that application should automatically restart on failure.
+ autorestart:bool = true (id: 7);
}
// Per node data and connection information.
diff --git a/aos/events/logging/logger_test.cc b/aos/events/logging/logger_test.cc
index 7a569eb..1f784d3 100644
--- a/aos/events/logging/logger_test.cc
+++ b/aos/events/logging/logger_test.cc
@@ -2614,9 +2614,9 @@
}
constexpr std::string_view kCombinedConfigSha1(
- "9e07da76098ad1b755a7c3143aca300d66b6abb88745f6c36e603ef1441f0ad5");
+ "b8114cd99e9c606b7517d8f478bf16513aef550f23f317720667314cb261f689");
constexpr std::string_view kSplitConfigSha1(
- "85ef8be228bf4eb36f4d64ba68183b2a9a616bfb9b057e430d61e33bd273df86");
+ "60943d0a46afa6028c8ae276e7b4900b35892f1f53b1818056e7d04463709c5b");
INSTANTIATE_TEST_SUITE_P(
All, MultinodeLoggerTest,
diff --git a/aos/starter/BUILD b/aos/starter/BUILD
index 73353f6..e6dad81 100644
--- a/aos/starter/BUILD
+++ b/aos/starter/BUILD
@@ -23,6 +23,21 @@
)
cc_library(
+ name = "subprocess",
+ srcs = ["subprocess.cc"],
+ hdrs = ["subprocess.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":starter_fbs",
+ ":starter_rpc_fbs",
+ "//aos/events:event_loop",
+ "//aos/events:shm_event_loop",
+ "//aos/util:scoped_pipe",
+ "@com_github_google_glog//:glog",
+ ],
+)
+
+cc_library(
name = "starterd_lib",
srcs = ["starterd_lib.cc"],
hdrs = ["starterd_lib.h"],
@@ -30,6 +45,7 @@
deps = [
":starter_fbs",
":starter_rpc_fbs",
+ ":subprocess",
"//aos:configuration",
"//aos:macros",
"//aos/events:shm_event_loop",
@@ -56,6 +72,24 @@
)
cc_test(
+ name = "subprocess_test",
+ srcs = ["subprocess_test.cc"],
+ data = [
+ "//aos/events:pingpong_config",
+ ],
+ # The roborio compiler doesn't support <filesystem>.
+ target_compatible_with =
+ ["@platforms//os:linux"],
+ deps = [
+ ":subprocess",
+ "//aos/events:shm_event_loop",
+ "//aos/testing:googletest",
+ "//aos/testing:path",
+ "//aos/testing:tmpdir",
+ ],
+)
+
+cc_test(
name = "starter_test",
srcs = ["starter_test.cc"],
data = [
@@ -65,7 +99,7 @@
"//aos/events:pong",
],
linkopts = ["-lstdc++fs"],
- shard_count = 3,
+ shard_count = 4,
# The roborio compiler doesn't support <filesystem>.
target_compatible_with =
select({
diff --git a/aos/starter/starter_cmd.cc b/aos/starter/starter_cmd.cc
index ae65ff0..65861e1 100644
--- a/aos/starter/starter_cmd.cc
+++ b/aos/starter/starter_cmd.cc
@@ -155,9 +155,20 @@
}
PrintKey();
for (const aos::Node *node : application_nodes) {
- auto status = aos::starter::GetStatus(application_name, config, node);
- PrintApplicationStatus(&status.message(), aos::monotonic_clock::now(),
- node);
+ auto optional_status =
+ aos::starter::GetStatus(application_name, config, node);
+ if (optional_status.has_value()) {
+ PrintApplicationStatus(&optional_status.value().second.message(),
+ optional_status.value().first, node);
+ } else {
+ if (node != nullptr) {
+ LOG(ERROR) << "No status available yet for \"" << application_name
+ << "\" on node \"" << node->name()->string_view() << "\".";
+ } else {
+ LOG(ERROR) << "No status available yet for \"" << application_name
+ << "\".";
+ }
+ }
}
} else {
LOG(ERROR) << "The \"status\" command requires zero or one arguments.";
diff --git a/aos/starter/starter_rpc_lib.cc b/aos/starter/starter_rpc_lib.cc
index 3007326..e373e6b 100644
--- a/aos/starter/starter_rpc_lib.cc
+++ b/aos/starter/starter_rpc_lib.cc
@@ -258,20 +258,26 @@
return success;
}
-const FlatbufferDetachedBuffer<aos::starter::ApplicationStatus> GetStatus(
- std::string_view name, const Configuration *config, const aos::Node *node) {
+const std::optional<
+ std::pair<aos::monotonic_clock::time_point,
+ FlatbufferDetachedBuffer<aos::starter::ApplicationStatus>>>
+GetStatus(std::string_view name, const Configuration *config,
+ const aos::Node *node) {
ShmEventLoop event_loop(config);
event_loop.SkipAosLog();
auto status_fetcher = event_loop.MakeFetcher<aos::starter::Status>(
StatusChannelForNode(config, node)->name()->string_view());
status_fetcher.Fetch();
- auto status = status_fetcher.get()
- ? FindApplicationStatus(*status_fetcher, name)
- : nullptr;
- return status ? aos::CopyFlatBuffer(status)
- : FlatbufferDetachedBuffer<
- aos::starter::ApplicationStatus>::Empty();
+ if (status_fetcher.get() != nullptr) {
+ const aos::starter::ApplicationStatus *status =
+ FindApplicationStatus(*status_fetcher, name);
+ if (status != nullptr) {
+ return std::make_pair(status_fetcher.context().monotonic_remote_time,
+ aos::CopyFlatBuffer(status));
+ }
+ }
+ return std::nullopt;
}
std::optional<std::pair<aos::monotonic_clock::time_point,
diff --git a/aos/starter/starter_rpc_lib.h b/aos/starter/starter_rpc_lib.h
index 7b93e24..3097b5f 100644
--- a/aos/starter/starter_rpc_lib.h
+++ b/aos/starter/starter_rpc_lib.h
@@ -97,11 +97,15 @@
std::chrono::milliseconds timeout);
// Fetches the status of the application with the given name. Creates a
-// temporary event loop from the provided config for fetching. Returns an empty
-// flatbuffer if the application is not found.
-const aos::FlatbufferDetachedBuffer<aos::starter::ApplicationStatus> GetStatus(
- std::string_view name, const aos::Configuration *config,
- const aos::Node *node);
+// temporary event loop from the provided config for fetching. Returns nullopt
+// if the application is not found.
+// The returned pair is the time at which the ApplicationStatus was sent on the
+// node it was sent from, to allow calculating uptimes on remote nodes.
+const std::optional<
+ std::pair<aos::monotonic_clock::time_point,
+ aos::FlatbufferDetachedBuffer<aos::starter::ApplicationStatus>>>
+GetStatus(std::string_view name, const aos::Configuration *config,
+ const aos::Node *node);
// Fetches the entire status message of starter. Creates a temporary event loop
// from the provided config for fetching.
diff --git a/aos/starter/starter_test.cc b/aos/starter/starter_test.cc
index f434e84..120fe38 100644
--- a/aos/starter/starter_test.cc
+++ b/aos/starter/starter_test.cc
@@ -316,5 +316,95 @@
starterd_thread.join();
}
+// Tests that starterd respects autorestart.
+TEST_F(StarterdTest, DeathNoRestartTest) {
+ const std::string config_file =
+ ArtifactPath("aos/events/pingpong_config.json");
+
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(config_file);
+
+ const std::string test_dir = aos::testing::TestTmpDir();
+
+ auto new_config = aos::configuration::MergeWithConfig(
+ &config.message(), absl::StrFormat(
+ R"({"applications": [
+ {
+ "name": "ping",
+ "executable_name": "%s",
+ "args": ["--shm_base", "%s/aos"],
+ "autorestart": false
+ },
+ {
+ "name": "pong",
+ "executable_name": "%s",
+ "args": ["--shm_base", "%s/aos"]
+ }
+ ]})",
+ ArtifactPath("aos/events/ping"), test_dir,
+ ArtifactPath("aos/events/pong"), test_dir));
+
+ const aos::Configuration *config_msg = &new_config.message();
+
+ // Set up starter with config file
+ aos::starter::Starter starter(config_msg);
+
+ // Create an event loop to watch for the Status message to watch the state
+ // transitions.
+ aos::ShmEventLoop watcher_loop(config_msg);
+ watcher_loop.SkipAosLog();
+
+ watcher_loop
+ .AddTimer([&watcher_loop] {
+ watcher_loop.Exit();
+ SUCCEED();
+ })
+ ->Setup(watcher_loop.monotonic_now() + std::chrono::seconds(11));
+
+ int test_stage = 0;
+ uint64_t id;
+
+ watcher_loop.MakeWatcher("/aos", [&test_stage, &watcher_loop,
+ &id](const aos::starter::Status &status) {
+ const aos::starter::ApplicationStatus *app_status =
+ FindApplicationStatus(status, "ping");
+ if (app_status == nullptr) {
+ return;
+ }
+
+ switch (test_stage) {
+ case 0: {
+ if (app_status->has_state() &&
+ app_status->state() == aos::starter::State::RUNNING) {
+ LOG(INFO) << "Ping is running";
+ test_stage = 1;
+ ASSERT_TRUE(app_status->has_pid());
+ ASSERT_TRUE(kill(app_status->pid(), SIGINT) != -1);
+ ASSERT_TRUE(app_status->has_id());
+ id = app_status->id();
+ }
+ break;
+ }
+
+ case 1: {
+ if (app_status->has_state() &&
+ app_status->state() == aos::starter::State::RUNNING &&
+ app_status->has_id() && app_status->id() != id) {
+ LOG(INFO) << "Ping restarted, it shouldn't...";
+ watcher_loop.Exit();
+ FAIL();
+ }
+ break;
+ }
+ }
+ });
+
+ std::thread starterd_thread([&starter] { starter.Run(); });
+ watcher_loop.Run();
+
+ starter.Cleanup();
+ starterd_thread.join();
+}
+
} // namespace starter
} // namespace aos
diff --git a/aos/starter/starterd.cc b/aos/starter/starterd.cc
index b40776d..a5a340a 100644
--- a/aos/starter/starterd.cc
+++ b/aos/starter/starterd.cc
@@ -24,6 +24,10 @@
return 1;
}
}
+ // Change the real and effective IDs to the user we're running as. The
+ // effective IDs mean files we access (like shared memory) will happen as
+ // that user. The real IDs allow child processes with an different effective
+ // ID to still participate in signal sending/receiving.
constexpr int kUnchanged = -1;
if (setresgid(/* ruid */ gid, /* euid */ gid,
/* suid */ kUnchanged) != 0) {
diff --git a/aos/starter/starterd_lib.cc b/aos/starter/starterd_lib.cc
index 7bf2e0d..008c46f 100644
--- a/aos/starter/starterd_lib.cc
+++ b/aos/starter/starterd_lib.cc
@@ -1,10 +1,5 @@
#include "starterd_lib.h"
-#include <fcntl.h>
-#include <pwd.h>
-#include <sys/fsuid.h>
-#include <sys/prctl.h>
-
#include <algorithm>
#include <utility>
@@ -16,398 +11,6 @@
namespace aos {
namespace starter {
-Application::Application(const aos::Application *application,
- aos::ShmEventLoop *event_loop,
- std::function<void()> on_change)
- : name_(application->name()->string_view()),
- path_(application->has_executable_name()
- ? application->executable_name()->string_view()
- : application->name()->string_view()),
- args_(1),
- user_(application->has_user() ? FindUid(application->user()->c_str())
- : std::nullopt),
- group_(application->has_user()
- ? FindPrimaryGidForUser(application->user()->c_str())
- : std::nullopt),
- autostart_(application->autostart()),
- event_loop_(event_loop),
- start_timer_(event_loop_->AddTimer([this] {
- status_ = aos::starter::State::RUNNING;
- LOG(INFO) << "Started '" << name_ << "' pid: " << pid_;
- })),
- restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
- stop_timer_(event_loop_->AddTimer([this] {
- if (kill(pid_, SIGKILL) == 0) {
- LOG(WARNING) << "Failed to stop, sending SIGKILL to '" << name_
- << "' pid: " << pid_;
- }
- })),
- on_change_(on_change) {}
-
-void Application::DoStart() {
- if (status_ != aos::starter::State::WAITING) {
- return;
- }
-
- start_timer_->Disable();
- restart_timer_->Disable();
-
- std::tie(read_pipe_, write_pipe_) = ScopedPipe::MakePipe();
-
- const pid_t pid = fork();
-
- if (pid != 0) {
- if (pid == -1) {
- PLOG(WARNING) << "Failed to fork '" << name_ << "'";
- stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
- status_ = aos::starter::State::STOPPED;
- } else {
- pid_ = pid;
- id_ = next_id_++;
- start_time_ = event_loop_->monotonic_now();
- status_ = aos::starter::State::STARTING;
- LOG(INFO) << "Starting '" << name_ << "' pid " << pid_;
-
- // Setup timer which moves application to RUNNING state if it is still
- // alive in 1 second.
- start_timer_->Setup(event_loop_->monotonic_now() +
- std::chrono::seconds(1));
- }
- on_change_();
- return;
- }
-
- // Clear out signal mask of parent so forked process receives all signals
- // normally.
- sigset_t empty_mask;
- sigemptyset(&empty_mask);
- sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
-
- // Cleanup children if starter dies in a way that is not handled gracefully.
- if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
- write_pipe_.Write(
- static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
- PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
- }
-
- if (group_) {
- if (setgid(*group_) == -1) {
- write_pipe_.Write(
- static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
- PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
- }
- }
-
- if (user_) {
- if (setuid(*user_) == -1) {
- write_pipe_.Write(
- static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
- PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
- }
- }
-
- // argv[0] should be the program name
- args_.insert(args_.begin(), path_.data());
-
- execvp(path_.c_str(), args_.data());
-
- // If we got here, something went wrong
- write_pipe_.Write(
- static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
- PLOG(WARNING) << "Could not execute " << name_ << " (" << path_ << ')';
-
- _exit(EXIT_FAILURE);
-}
-
-void Application::DoStop(bool restart) {
- // If stop or restart received, the old state of these is no longer applicable
- // so cancel both.
- restart_timer_->Disable();
- start_timer_->Disable();
-
- switch (status_) {
- case aos::starter::State::STARTING:
- case aos::starter::State::RUNNING: {
- LOG(INFO) << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
- << SIGINT;
- status_ = aos::starter::State::STOPPING;
-
- kill(pid_, SIGINT);
-
- // Watchdog timer to SIGKILL application if it is still running 1 second
- // after SIGINT
- stop_timer_->Setup(event_loop_->monotonic_now() +
- std::chrono::seconds(1));
- queue_restart_ = restart;
- on_change_();
- break;
- }
- case aos::starter::State::WAITING: {
- // If waiting to restart, and receives restart, skip the waiting period
- // and restart immediately. If stop received, all we have to do is move
- // to the STOPPED state.
- if (restart) {
- DoStart();
- } else {
- status_ = aos::starter::State::STOPPED;
- on_change_();
- }
- break;
- }
- case aos::starter::State::STOPPING: {
- // If the application is already stopping, then we just need to update the
- // restart flag to the most recent status.
- queue_restart_ = restart;
- break;
- }
- case aos::starter::State::STOPPED: {
- // Restart immediately if the application is already stopped
- if (restart) {
- status_ = aos::starter::State::WAITING;
- DoStart();
- }
- break;
- }
- }
-}
-
-void Application::QueueStart() {
- status_ = aos::starter::State::WAITING;
-
- LOG(INFO) << "Restarting " << name_ << " in 3 seconds";
- restart_timer_->Setup(event_loop_->monotonic_now() + std::chrono::seconds(3));
- start_timer_->Disable();
- stop_timer_->Disable();
- on_change_();
-}
-
-void Application::set_args(
- const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
- args_.clear();
- std::transform(v.begin(), v.end(), std::back_inserter(args_),
- [](const flatbuffers::String *str) {
- return const_cast<char *>(str->c_str());
- });
- args_.push_back(nullptr);
-}
-
-std::optional<uid_t> Application::FindUid(const char *name) {
- // TODO(austin): Use the reentrant version. This should be safe.
- struct passwd *user_data = getpwnam(name);
- if (user_data != nullptr) {
- return user_data->pw_uid;
- } else {
- LOG(FATAL) << "Could not find user " << name;
- return std::nullopt;
- }
-}
-
-std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
- // TODO(austin): Use the reentrant version. This should be safe.
- struct passwd *user_data = getpwnam(name);
- if (user_data != nullptr) {
- return user_data->pw_gid;
- } else {
- LOG(FATAL) << "Could not find user " << name;
- return std::nullopt;
- }
-}
-
-flatbuffers::Offset<aos::starter::ApplicationStatus>
-Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder) {
- CHECK_NOTNULL(builder);
- auto name_fbs = builder->CreateString(name_);
-
- aos::starter::ApplicationStatus::Builder status_builder(*builder);
- status_builder.add_name(name_fbs);
- status_builder.add_state(status_);
- status_builder.add_last_exit_code(exit_code_);
- status_builder.add_last_stop_reason(stop_reason_);
- if (pid_ != -1) {
- status_builder.add_pid(pid_);
- status_builder.add_id(id_);
- }
- status_builder.add_last_start_time(start_time_.time_since_epoch().count());
- return status_builder.Finish();
-}
-
-void Application::Terminate() {
- stop_reason_ = aos::starter::LastStopReason::TERMINATE;
- DoStop(false);
- terminating_ = true;
-}
-
-void Application::HandleCommand(aos::starter::Command cmd) {
- switch (cmd) {
- case aos::starter::Command::START: {
- switch (status_) {
- case aos::starter::State::WAITING: {
- restart_timer_->Disable();
- DoStart();
- break;
- }
- case aos::starter::State::STARTING: {
- break;
- }
- case aos::starter::State::RUNNING: {
- break;
- }
- case aos::starter::State::STOPPING: {
- queue_restart_ = true;
- break;
- }
- case aos::starter::State::STOPPED: {
- status_ = aos::starter::State::WAITING;
- DoStart();
- break;
- }
- }
- break;
- }
- case aos::starter::Command::STOP: {
- stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
- DoStop(false);
- break;
- }
- case aos::starter::Command::RESTART: {
- stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
- DoStop(true);
- break;
- }
- }
-}
-
-bool Application::MaybeHandleSignal() {
- int status;
-
- // Check if the status of this process has changed
- if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
- return false;
- }
-
- // Check that the event was the process exiting
- if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
- return false;
- }
-
- exit_time_ = event_loop_->monotonic_now();
- exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
-
- if (auto read_result = read_pipe_.Read()) {
- stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
- }
-
- switch (status_) {
- case aos::starter::State::STARTING: {
- LOG(WARNING) << "Failed to start '" << name_ << "' on pid " << pid_
- << " : Exited with status " << exit_code_;
- QueueStart();
- break;
- }
- case aos::starter::State::RUNNING: {
- LOG(WARNING) << "Application '" << name_ << "' pid " << pid_
- << " exited unexpectedly with status " << exit_code_;
- QueueStart();
- break;
- }
- case aos::starter::State::STOPPING: {
- LOG(INFO) << "Successfully stopped '" << name_ << "' pid: " << pid_
- << " with status " << exit_code_;
- status_ = aos::starter::State::STOPPED;
-
- // Disable force stop timer since the process already died
- stop_timer_->Disable();
-
- on_change_();
- if (terminating_) {
- return true;
- }
-
- if (queue_restart_) {
- queue_restart_ = false;
- status_ = aos::starter::State::WAITING;
- DoStart();
- }
- break;
- }
- case aos::starter::State::WAITING:
- case aos::starter::State::STOPPED: {
- LOG(FATAL)
- << "Received signal on process that was already stopped : name: '"
- << name_ << "' pid: " << pid_;
- break;
- }
- }
-
- return false;
-}
-
-ScopedPipe::ScopedPipe(int fd) : fd_(fd) {}
-
-ScopedPipe::~ScopedPipe() {
- if (fd_ != -1) {
- PCHECK(close(fd_) != -1);
- }
-}
-
-ScopedPipe::ScopedPipe(ScopedPipe &&scoped_pipe) : fd_(scoped_pipe.fd_) {
- scoped_pipe.fd_ = -1;
-}
-
-ScopedPipe &ScopedPipe::operator=(ScopedPipe &&scoped_pipe) {
- if (fd_ != -1) {
- PCHECK(close(fd_) != -1);
- }
- fd_ = scoped_pipe.fd_;
- scoped_pipe.fd_ = -1;
- return *this;
-}
-
-std::tuple<ScopedPipe::ScopedReadPipe, ScopedPipe::ScopedWritePipe>
-ScopedPipe::MakePipe() {
- int fds[2];
- PCHECK(pipe(fds) != -1);
- PCHECK(fcntl(fds[0], F_SETFL, fcntl(fds[0], F_GETFL) | O_NONBLOCK) != -1);
- PCHECK(fcntl(fds[1], F_SETFL, fcntl(fds[1], F_GETFL) | O_NONBLOCK) != -1);
- return {ScopedReadPipe(fds[0]), ScopedWritePipe(fds[1])};
-}
-
-std::optional<uint32_t> ScopedPipe::ScopedReadPipe::Read() {
- uint32_t buf;
- ssize_t result = read(fd(), &buf, sizeof(buf));
- if (result == sizeof(buf)) {
- return buf;
- } else {
- return std::nullopt;
- }
-}
-
-void ScopedPipe::ScopedWritePipe::Write(uint32_t data) {
- ssize_t result = write(fd(), &data, sizeof(data));
- PCHECK(result != -1);
- CHECK(result == sizeof(data));
-}
-
-SignalListener::SignalListener(aos::ShmEventLoop *loop,
- std::function<void(signalfd_siginfo)> callback)
- : loop_(loop),
- callback_(std::move(callback)),
- signalfd_({SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV, SIGPIPE,
- SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {
- loop->epoll()->OnReadable(signalfd_.fd(), [this] {
- signalfd_siginfo info = signalfd_.Read();
-
- if (info.ssi_signo == 0) {
- LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
- return;
- }
-
- callback_(info);
- });
-}
-
-SignalListener::~SignalListener() { loop_->epoll()->DeleteFd(signalfd_.fd()); }
-
const aos::Channel *StatusChannelForNode(const aos::Configuration *config,
const aos::Node *node) {
return configuration::GetChannel<Status>(config, "/aos", "", node);
@@ -566,9 +169,11 @@
applications_.try_emplace(application->name()->str(), application,
&event_loop_, [this]() { MaybeSendStatus(); });
if (success) {
- if (application->has_args()) {
- iter->second.set_args(*application->args());
- }
+ // We should be catching and handling SIGCHLD correctly in the starter, so
+ // don't leave in the crutch for polling for the child process status (this
+ // is less about efficiency, and more about making sure bit rot doesn't
+ // result in the signal handling breaking).
+ iter->second.DisableChildDeathPolling();
return &(iter->second);
}
return nullptr;
diff --git a/aos/starter/starterd_lib.h b/aos/starter/starterd_lib.h
index 1809326..834e191 100644
--- a/aos/starter/starterd_lib.h
+++ b/aos/starter/starterd_lib.h
@@ -16,149 +16,11 @@
#include "aos/macros.h"
#include "aos/starter/starter_generated.h"
#include "aos/starter/starter_rpc_generated.h"
+#include "aos/starter/subprocess.h"
namespace aos {
namespace starter {
-// RAII Pipe for sending individual ints between reader and writer.
-class ScopedPipe {
- public:
- class ScopedReadPipe;
- class ScopedWritePipe;
-
- static std::tuple<ScopedReadPipe, ScopedWritePipe> MakePipe();
-
- virtual ~ScopedPipe();
-
- int fd() const { return fd_; }
-
- private:
- ScopedPipe(int fd = -1);
-
- int fd_;
-
- ScopedPipe(const ScopedPipe &) = delete;
- ScopedPipe &operator=(const ScopedPipe &) = delete;
- ScopedPipe(ScopedPipe &&);
- ScopedPipe &operator=(ScopedPipe &&);
-};
-
-class ScopedPipe::ScopedReadPipe : public ScopedPipe {
- public:
- std::optional<uint32_t> Read();
-
- private:
- using ScopedPipe::ScopedPipe;
-
- friend class ScopedPipe;
-};
-
-class ScopedPipe::ScopedWritePipe : public ScopedPipe {
- public:
- void Write(uint32_t data);
-
- private:
- using ScopedPipe::ScopedPipe;
-
- friend class ScopedPipe;
-};
-
-// Manages a running process, allowing starting and stopping, and restarting
-// automatically.
-class Application {
- public:
- Application(const aos::Application *application,
- aos::ShmEventLoop *event_loop, std::function<void()> on_change);
-
- flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus(
- flatbuffers::FlatBufferBuilder *builder);
-
- // Returns the last pid of this process. -1 if not started yet.
- pid_t get_pid() const { return pid_; }
-
- // Handles a SIGCHLD signal received by the parent. Does nothing if this
- // process was not the target. Returns true if this Application should be
- // removed.
- bool MaybeHandleSignal();
-
- // Handles a command. May do nothing if application is already in the desired
- // state.
- void HandleCommand(aos::starter::Command cmd);
-
- void Start() { HandleCommand(aos::starter::Command::START); }
-
- void Stop() { HandleCommand(aos::starter::Command::STOP); }
-
- void Terminate();
-
- void set_args(
- const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>
- &args);
-
- bool autostart() const { return autostart_; }
-
- private:
- void DoStart();
-
- void DoStop(bool restart);
-
- void QueueStart();
-
- // Copy flatbuffer vector of strings to vector of std::string.
- static std::vector<std::string> FbsVectorToVector(
- const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v);
-
- static std::optional<uid_t> FindUid(const char *name);
- static std::optional<gid_t> FindPrimaryGidForUser(const char *name);
-
- // Next unique id for all applications
- static inline uint64_t next_id_ = 0;
-
- std::string name_;
- std::string path_;
- std::vector<char *> args_;
- std::optional<uid_t> user_;
- std::optional<gid_t> group_;
-
- pid_t pid_ = -1;
- ScopedPipe::ScopedReadPipe read_pipe_;
- ScopedPipe::ScopedWritePipe write_pipe_;
- uint64_t id_;
- int exit_code_ = 0;
- aos::monotonic_clock::time_point start_time_, exit_time_;
- bool queue_restart_ = false;
- bool terminating_ = false;
- bool autostart_ = true;
-
- aos::starter::State status_ = aos::starter::State::STOPPED;
- aos::starter::LastStopReason stop_reason_ =
- aos::starter::LastStopReason::STOP_REQUESTED;
-
- aos::ShmEventLoop *event_loop_;
- aos::TimerHandler *start_timer_, *restart_timer_, *stop_timer_;
-
- std::function<void()> on_change_;
-
- DISALLOW_COPY_AND_ASSIGN(Application);
-};
-
-// Registers a signalfd listener with the given event loop and calls callback
-// whenever a signal is received.
-class SignalListener {
- public:
- SignalListener(aos::ShmEventLoop *loop,
- std::function<void(signalfd_siginfo)> callback);
-
- ~SignalListener();
-
- private:
- aos::ShmEventLoop *loop_;
- std::function<void(signalfd_siginfo)> callback_;
- aos::ipc_lib::SignalFd signalfd_;
-
- DISALLOW_COPY_AND_ASSIGN(SignalListener);
-};
-
const aos::Channel *StatusChannelForNode(const aos::Configuration *config,
const aos::Node *node);
const aos::Channel *StarterRpcChannelForNode(const aos::Configuration *config,
diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
new file mode 100644
index 0000000..c1eb618
--- /dev/null
+++ b/aos/starter/subprocess.cc
@@ -0,0 +1,501 @@
+#include "aos/starter/subprocess.h"
+
+#include <grp.h>
+#include <pwd.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "glog/logging.h"
+
+namespace aos::starter {
+
+SignalListener::SignalListener(aos::ShmEventLoop *loop,
+ std::function<void(signalfd_siginfo)> callback)
+ : SignalListener(loop, callback,
+ {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
+ SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
+
+SignalListener::SignalListener(aos::ShmEventLoop *loop,
+ std::function<void(signalfd_siginfo)> callback,
+ std::initializer_list<unsigned int> signals)
+ : loop_(loop), callback_(std::move(callback)), signalfd_(signals) {
+ loop->epoll()->OnReadable(signalfd_.fd(), [this] {
+ signalfd_siginfo info = signalfd_.Read();
+
+ if (info.ssi_signo == 0) {
+ LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
+ return;
+ }
+
+ callback_(info);
+ });
+}
+
+SignalListener::~SignalListener() { loop_->epoll()->DeleteFd(signalfd_.fd()); }
+
+Application::Application(std::string_view name,
+ std::string_view executable_name,
+ aos::EventLoop *event_loop,
+ std::function<void()> on_change)
+ : name_(name),
+ path_(executable_name),
+ event_loop_(event_loop),
+ start_timer_(event_loop_->AddTimer([this] {
+ status_ = aos::starter::State::RUNNING;
+ LOG(INFO) << "Started '" << name_ << "' pid: " << pid_;
+ })),
+ restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
+ stop_timer_(event_loop_->AddTimer([this] {
+ if (kill(pid_, SIGKILL) == 0) {
+ LOG(WARNING) << "Failed to stop, sending SIGKILL to '" << name_
+ << "' pid: " << pid_;
+ }
+ })),
+ pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
+ child_status_handler_(
+ event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
+ on_change_(on_change) {
+ event_loop_->OnRun([this]() {
+ // Every second poll to check if the child is dead. This is used as a
+ // default for the case where the user is not directly catching SIGCHLD and
+ // calling MaybeHandleSignal for us.
+ child_status_handler_->Setup(event_loop_->monotonic_now(),
+ std::chrono::seconds(1));
+ });
+}
+
+Application::Application(const aos::Application *application,
+ aos::EventLoop *event_loop,
+ std::function<void()> on_change)
+ : Application(application->name()->string_view(),
+ application->has_executable_name()
+ ? application->executable_name()->string_view()
+ : application->name()->string_view(),
+ event_loop, on_change) {
+ user_name_ = application->has_user() ? application->user()->str() : "";
+ user_ = application->has_user() ? FindUid(user_name_.c_str()) : std::nullopt;
+ group_ = application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
+ : std::nullopt;
+ autostart_ = application->autostart();
+ autorestart_ = application->autorestart();
+ if (application->has_args()) {
+ set_args(*application->args());
+ }
+}
+
+void Application::DoStart() {
+ if (status_ != aos::starter::State::WAITING) {
+ return;
+ }
+
+ start_timer_->Disable();
+ restart_timer_->Disable();
+
+ status_pipes_ = util::ScopedPipe::MakePipe();
+
+ if (capture_stdout_) {
+ stdout_pipes_ = util::ScopedPipe::MakePipe();
+ stdout_.clear();
+ }
+ if (capture_stderr_) {
+ stderr_pipes_ = util::ScopedPipe::MakePipe();
+ stderr_.clear();
+ }
+
+ pipe_timer_->Setup(event_loop_->monotonic_now(),
+ std::chrono::milliseconds(100));
+
+ const pid_t pid = fork();
+
+ if (pid != 0) {
+ if (pid == -1) {
+ PLOG(WARNING) << "Failed to fork '" << name_ << "'";
+ stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
+ status_ = aos::starter::State::STOPPED;
+ } else {
+ pid_ = pid;
+ id_ = next_id_++;
+ start_time_ = event_loop_->monotonic_now();
+ status_ = aos::starter::State::STARTING;
+ LOG(INFO) << "Starting '" << name_ << "' pid " << pid_;
+
+ // Setup timer which moves application to RUNNING state if it is still
+ // alive in 1 second.
+ start_timer_->Setup(event_loop_->monotonic_now() +
+ std::chrono::seconds(1));
+ // Since we are the parent process, clear our write-side of all the pipes.
+ status_pipes_.write.reset();
+ stdout_pipes_.write.reset();
+ stderr_pipes_.write.reset();
+ }
+ on_change_();
+ return;
+ }
+
+ // Since we are the child process, clear our read-side of all the pipes.
+ status_pipes_.read.reset();
+ stdout_pipes_.read.reset();
+ stderr_pipes_.read.reset();
+
+ // The status pipe will not be needed if the execve succeeds.
+ status_pipes_.write->SetCloexec();
+
+ // Clear out signal mask of parent so forked process receives all signals
+ // normally.
+ sigset_t empty_mask;
+ sigemptyset(&empty_mask);
+ sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
+
+ // Cleanup children if starter dies in a way that is not handled gracefully.
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
+ status_pipes_.write->Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
+ PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
+ }
+
+ if (group_) {
+ CHECK(!user_name_.empty());
+ // The manpage for setgroups says we just need CAP_SETGID, but empirically
+ // we also need the effective UID to be 0 to make it work. user_ must also
+ // be set so we change this effective UID back later.
+ CHECK(user_);
+ if (seteuid(0) == -1) {
+ status_pipes_.write->Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
+ PLOG(FATAL) << "Could not seteuid(0) for " << name_
+ << " in preparation for setting groups";
+ }
+ if (initgroups(user_name_.c_str(), *group_) == -1) {
+ status_pipes_.write->Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
+ PLOG(FATAL) << "Could not initialize normal groups for " << name_
+ << " as " << user_name_ << " with " << *group_;
+ }
+ if (setgid(*group_) == -1) {
+ status_pipes_.write->Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
+ PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
+ }
+ }
+
+ if (user_) {
+ if (setuid(*user_) == -1) {
+ status_pipes_.write->Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
+ PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
+ }
+ }
+
+ if (capture_stdout_) {
+ PCHECK(STDOUT_FILENO == dup2(stdout_pipes_.write->fd(), STDOUT_FILENO));
+ stdout_pipes_.write.reset();
+ }
+
+ if (capture_stderr_) {
+ PCHECK(STDERR_FILENO == dup2(stderr_pipes_.write->fd(), STDERR_FILENO));
+ stderr_pipes_.write.reset();
+ }
+
+ // argv[0] should be the program name
+ args_.insert(args_.begin(), path_);
+
+ std::vector<char *> cargs = CArgs();
+ execvp(path_.c_str(), cargs.data());
+
+ // If we got here, something went wrong
+ status_pipes_.write->Write(
+ static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
+ PLOG(WARNING) << "Could not execute " << name_ << " (" << path_ << ')';
+
+ _exit(EXIT_FAILURE);
+}
+
+void Application::FetchOutputs() {
+ if (capture_stdout_) {
+ stdout_pipes_.read->Read(&stdout_);
+ }
+ if (capture_stderr_) {
+ stderr_pipes_.read->Read(&stderr_);
+ }
+}
+
+const std::string &Application::GetStdout() {
+ CHECK(capture_stdout_);
+ FetchOutputs();
+ return stdout_;
+}
+
+const std::string &Application::GetStderr() {
+ CHECK(capture_stderr_);
+ FetchOutputs();
+ return stderr_;
+}
+
+void Application::DoStop(bool restart) {
+ // If stop or restart received, the old state of these is no longer applicable
+ // so cancel both.
+ restart_timer_->Disable();
+ start_timer_->Disable();
+
+ FetchOutputs();
+
+ switch (status_) {
+ case aos::starter::State::STARTING:
+ case aos::starter::State::RUNNING: {
+ LOG(INFO) << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
+ << SIGINT;
+ status_ = aos::starter::State::STOPPING;
+
+ kill(pid_, SIGINT);
+
+ // Watchdog timer to SIGKILL application if it is still running 1 second
+ // after SIGINT
+ stop_timer_->Setup(event_loop_->monotonic_now() +
+ std::chrono::seconds(1));
+ queue_restart_ = restart;
+ on_change_();
+ break;
+ }
+ case aos::starter::State::WAITING: {
+ // If waiting to restart, and receives restart, skip the waiting period
+ // and restart immediately. If stop received, all we have to do is move
+ // to the STOPPED state.
+ if (restart) {
+ DoStart();
+ } else {
+ status_ = aos::starter::State::STOPPED;
+ on_change_();
+ }
+ break;
+ }
+ case aos::starter::State::STOPPING: {
+ // If the application is already stopping, then we just need to update the
+ // restart flag to the most recent status.
+ queue_restart_ = restart;
+ break;
+ }
+ case aos::starter::State::STOPPED: {
+ // Restart immediately if the application is already stopped
+ if (restart) {
+ status_ = aos::starter::State::WAITING;
+ DoStart();
+ }
+ break;
+ }
+ }
+}
+
+void Application::QueueStart() {
+ status_ = aos::starter::State::WAITING;
+
+ LOG(INFO) << "Restarting " << name_ << " in 3 seconds";
+ restart_timer_->Setup(event_loop_->monotonic_now() + std::chrono::seconds(3));
+ start_timer_->Disable();
+ stop_timer_->Disable();
+ on_change_();
+}
+
+std::vector<char *> Application::CArgs() {
+ std::vector<char *> cargs;
+ std::transform(args_.begin(), args_.end(), std::back_inserter(cargs),
+ [](std::string &str) { return str.data(); });
+ cargs.push_back(nullptr);
+ return cargs;
+}
+
+void Application::set_args(
+ const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
+ args_.clear();
+ std::transform(v.begin(), v.end(), std::back_inserter(args_),
+ [](const flatbuffers::String *str) { return str->str(); });
+}
+
+void Application::set_args(std::vector<std::string> args) {
+ args_ = std::move(args);
+}
+
+void Application::set_capture_stdout(bool capture) {
+ capture_stdout_ = capture;
+}
+
+void Application::set_capture_stderr(bool capture) {
+ capture_stderr_ = capture;
+}
+
+std::optional<uid_t> Application::FindUid(const char *name) {
+ // TODO(austin): Use the reentrant version. This should be safe.
+ struct passwd *user_data = getpwnam(name);
+ if (user_data != nullptr) {
+ return user_data->pw_uid;
+ } else {
+ LOG(FATAL) << "Could not find user " << name;
+ return std::nullopt;
+ }
+}
+
+std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
+ // TODO(austin): Use the reentrant version. This should be safe.
+ struct passwd *user_data = getpwnam(name);
+ if (user_data != nullptr) {
+ return user_data->pw_gid;
+ } else {
+ LOG(FATAL) << "Could not find user " << name;
+ return std::nullopt;
+ }
+}
+
+flatbuffers::Offset<aos::starter::ApplicationStatus>
+Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder) {
+ CHECK_NOTNULL(builder);
+ auto name_fbs = builder->CreateString(name_);
+
+ aos::starter::ApplicationStatus::Builder status_builder(*builder);
+ status_builder.add_name(name_fbs);
+ status_builder.add_state(status_);
+ if (exit_code_.has_value()) {
+ status_builder.add_last_exit_code(exit_code_.value());
+ }
+ status_builder.add_last_stop_reason(stop_reason_);
+ if (pid_ != -1) {
+ status_builder.add_pid(pid_);
+ status_builder.add_id(id_);
+ }
+ status_builder.add_last_start_time(start_time_.time_since_epoch().count());
+ return status_builder.Finish();
+}
+
+void Application::Terminate() {
+ stop_reason_ = aos::starter::LastStopReason::TERMINATE;
+ DoStop(false);
+ terminating_ = true;
+}
+
+void Application::HandleCommand(aos::starter::Command cmd) {
+ switch (cmd) {
+ case aos::starter::Command::START: {
+ switch (status_) {
+ case aos::starter::State::WAITING: {
+ restart_timer_->Disable();
+ DoStart();
+ break;
+ }
+ case aos::starter::State::STARTING: {
+ break;
+ }
+ case aos::starter::State::RUNNING: {
+ break;
+ }
+ case aos::starter::State::STOPPING: {
+ queue_restart_ = true;
+ break;
+ }
+ case aos::starter::State::STOPPED: {
+ status_ = aos::starter::State::WAITING;
+ DoStart();
+ break;
+ }
+ }
+ break;
+ }
+ case aos::starter::Command::STOP: {
+ stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
+ DoStop(false);
+ break;
+ }
+ case aos::starter::Command::RESTART: {
+ stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
+ DoStop(true);
+ break;
+ }
+ }
+}
+
+bool Application::MaybeHandleSignal() {
+ int status;
+
+ // Check if the status of this process has changed
+ if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
+ return false;
+ }
+
+ // Check that the event was the process exiting
+ if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
+ return false;
+ }
+
+ start_timer_->Disable();
+ exit_time_ = event_loop_->monotonic_now();
+ exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
+
+ if (auto read_result = status_pipes_.read->Read()) {
+ stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
+ }
+
+ switch (status_) {
+ case aos::starter::State::STARTING: {
+ if (exit_code_.value() == 0) {
+ LOG(INFO) << "Application '" << name_ << "' pid " << pid_
+ << " exited with status " << exit_code_.value();
+ } else {
+ LOG(WARNING) << "Failed to start '" << name_ << "' on pid " << pid_
+ << " : Exited with status " << exit_code_.value();
+ }
+ if (autorestart()) {
+ QueueStart();
+ } else {
+ status_ = aos::starter::State::STOPPED;
+ on_change_();
+ }
+ break;
+ }
+ case aos::starter::State::RUNNING: {
+ if (exit_code_.value() == 0) {
+ LOG(INFO) << "Application '" << name_ << "' pid " << pid_
+ << " exited with status " << exit_code_.value();
+ } else {
+ LOG(WARNING) << "Application '" << name_ << "' pid " << pid_
+ << " exited unexpectedly with status "
+ << exit_code_.value();
+ }
+ if (autorestart()) {
+ QueueStart();
+ } else {
+ status_ = aos::starter::State::STOPPED;
+ on_change_();
+ }
+ break;
+ }
+ case aos::starter::State::STOPPING: {
+ LOG(INFO) << "Successfully stopped '" << name_ << "' pid: " << pid_
+ << " with status " << exit_code_.value();
+ status_ = aos::starter::State::STOPPED;
+
+ // Disable force stop timer since the process already died
+ stop_timer_->Disable();
+
+ on_change_();
+ if (terminating_) {
+ return true;
+ }
+
+ if (queue_restart_) {
+ queue_restart_ = false;
+ status_ = aos::starter::State::WAITING;
+ DoStart();
+ }
+ break;
+ }
+ case aos::starter::State::WAITING:
+ case aos::starter::State::STOPPED: {
+ LOG(FATAL)
+ << "Received signal on process that was already stopped : name: '"
+ << name_ << "' pid: " << pid_;
+ break;
+ }
+ }
+
+ return false;
+}
+
+} // namespace aos::starter
diff --git a/aos/starter/subprocess.h b/aos/starter/subprocess.h
new file mode 100644
index 0000000..9ee9e31
--- /dev/null
+++ b/aos/starter/subprocess.h
@@ -0,0 +1,150 @@
+#ifndef AOS_STARTER_SUBPROCESS_H_
+#define AOS_STARTER_SUBPROCESS_H_
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "aos/events/event_loop.h"
+#include "aos/events/shm_event_loop.h"
+#include "aos/starter/starter_generated.h"
+#include "aos/starter/starter_rpc_generated.h"
+#include "aos/util/scoped_pipe.h"
+
+namespace aos::starter {
+
+// Registers a signalfd listener with the given event loop and calls callback
+// whenever a signal is received.
+class SignalListener {
+ public:
+ SignalListener(aos::ShmEventLoop *loop,
+ std::function<void(signalfd_siginfo)> callback);
+ SignalListener(aos::ShmEventLoop *loop,
+ std::function<void(signalfd_siginfo)> callback,
+ std::initializer_list<unsigned int> signals);
+
+ ~SignalListener();
+
+ private:
+ aos::ShmEventLoop *loop_;
+ std::function<void(signalfd_siginfo)> callback_;
+ aos::ipc_lib::SignalFd signalfd_;
+
+ DISALLOW_COPY_AND_ASSIGN(SignalListener);
+};
+
+// Manages a running process, allowing starting and stopping, and restarting
+// automatically.
+class Application {
+ public:
+ Application(const aos::Application *application, aos::EventLoop *event_loop,
+ std::function<void()> on_change);
+
+ Application(std::string_view name, std::string_view executable_name,
+ aos::EventLoop *event_loop, std::function<void()> on_change);
+
+ flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus(
+ flatbuffers::FlatBufferBuilder *builder);
+ aos::starter::State status() const { return status_; };
+
+ // Returns the last pid of this process. -1 if not started yet.
+ pid_t get_pid() const { return pid_; }
+
+ // Handles a SIGCHLD signal received by the parent. Does nothing if this
+ // process was not the target. Returns true if this Application should be
+ // removed.
+ bool MaybeHandleSignal();
+ void DisableChildDeathPolling() { child_status_handler_->Disable(); }
+
+ // Handles a command. May do nothing if application is already in the desired
+ // state.
+ void HandleCommand(aos::starter::Command cmd);
+
+ void Start() { HandleCommand(aos::starter::Command::START); }
+
+ void Stop() { HandleCommand(aos::starter::Command::STOP); }
+
+ void Terminate();
+
+ void set_args(std::vector<std::string> args);
+ void set_capture_stdout(bool capture);
+ void set_capture_stderr(bool capture);
+
+ bool autostart() const { return autostart_; }
+
+ bool autorestart() const { return autorestart_; }
+
+ const std::string &GetStdout();
+ const std::string &GetStderr();
+ std::optional<int> exit_code() const { return exit_code_; }
+
+ private:
+ typedef aos::util::ScopedPipe::PipePair PipePair;
+ void set_args(
+ const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>
+ &args);
+
+ void DoStart();
+
+ void DoStop(bool restart);
+
+ void QueueStart();
+
+ // Copy flatbuffer vector of strings to vector of std::string.
+ static std::vector<std::string> FbsVectorToVector(
+ const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v);
+
+ static std::optional<uid_t> FindUid(const char *name);
+ static std::optional<gid_t> FindPrimaryGidForUser(const char *name);
+
+ void FetchOutputs();
+
+ // Provides an std::vector of the args (such that CArgs().data() ends up being
+ // suitable to pass to execve()).
+ // The points are invalidated when args_ changes (e.g., due to a set_args
+ // call).
+ std::vector<char *> CArgs();
+
+ // Next unique id for all applications
+ static inline uint64_t next_id_ = 0;
+
+ std::string name_;
+ std::string path_;
+ std::vector<std::string> args_;
+ std::string user_name_;
+ std::optional<uid_t> user_;
+ std::optional<gid_t> group_;
+
+ bool capture_stdout_ = false;
+ PipePair stdout_pipes_;
+ std::string stdout_;
+ bool capture_stderr_ = false;
+ PipePair stderr_pipes_;
+ std::string stderr_;
+
+ pid_t pid_ = -1;
+ PipePair status_pipes_;
+ uint64_t id_ = 0;
+ std::optional<int> exit_code_;
+ aos::monotonic_clock::time_point start_time_, exit_time_;
+ bool queue_restart_ = false;
+ bool terminating_ = false;
+ bool autostart_ = false;
+ bool autorestart_ = false;
+
+ aos::starter::State status_ = aos::starter::State::STOPPED;
+ aos::starter::LastStopReason stop_reason_ =
+ aos::starter::LastStopReason::STOP_REQUESTED;
+
+ aos::EventLoop *event_loop_;
+ aos::TimerHandler *start_timer_, *restart_timer_, *stop_timer_, *pipe_timer_,
+ *child_status_handler_;
+
+ std::function<void()> on_change_;
+
+ DISALLOW_COPY_AND_ASSIGN(Application);
+};
+
+} // namespace aos::starter
+#endif // AOS_STARTER_SUBPROCESS_H_
diff --git a/aos/starter/subprocess_test.cc b/aos/starter/subprocess_test.cc
new file mode 100644
index 0000000..93fbf6a
--- /dev/null
+++ b/aos/starter/subprocess_test.cc
@@ -0,0 +1,97 @@
+#include "aos/starter/subprocess.h"
+
+#include "aos/events/shm_event_loop.h"
+#include "aos/testing/path.h"
+#include "aos/testing/tmpdir.h"
+#include "aos/util/file.h"
+#include "gtest/gtest.h"
+
+namespace aos::starter::testing {
+
+class SubprocessTest : public ::testing::Test {
+ protected:
+ SubprocessTest() : shm_dir_(aos::testing::TestTmpDir() + "/aos") {
+ FLAGS_shm_base = shm_dir_;
+
+ // Nuke the shm dir:
+ aos::util::UnlinkRecursive(shm_dir_);
+ }
+
+ gflags::FlagSaver flag_saver_;
+ std::string shm_dir_;
+};
+
+TEST_F(SubprocessTest, CaptureOutputs) {
+ const std::string config_file =
+ ::aos::testing::ArtifactPath("aos/events/pingpong_config.json");
+
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(config_file);
+ aos::ShmEventLoop event_loop(&config.message());
+ bool observed_stopped = false;
+ Application echo_stdout(
+ "echo", "echo", &event_loop, [&observed_stopped, &echo_stdout]() {
+ if (echo_stdout.status() == aos::starter::State::STOPPED) {
+ observed_stopped = true;
+ }
+ });
+ ASSERT_FALSE(echo_stdout.autorestart());
+ echo_stdout.set_args({"abcdef"});
+ echo_stdout.set_capture_stdout(true);
+ echo_stdout.set_capture_stderr(true);
+
+ echo_stdout.Start();
+ aos::TimerHandler *exit_timer =
+ event_loop.AddTimer([&event_loop]() { event_loop.Exit(); });
+ event_loop.OnRun([&event_loop, exit_timer]() {
+ exit_timer->Setup(event_loop.monotonic_now() + std::chrono::seconds(1));
+ });
+
+ event_loop.Run();
+
+ ASSERT_EQ("abcdef\n", echo_stdout.GetStdout());
+ ASSERT_TRUE(echo_stdout.GetStderr().empty());
+ EXPECT_TRUE(observed_stopped);
+ EXPECT_EQ(aos::starter::State::STOPPED, echo_stdout.status());
+
+ observed_stopped = false;
+
+ // Run again, the output should've been cleared.
+ echo_stdout.set_args({"ghijkl"});
+ echo_stdout.Start();
+ event_loop.Run();
+ ASSERT_EQ("ghijkl\n", echo_stdout.GetStdout());
+ EXPECT_TRUE(observed_stopped);
+}
+
+TEST_F(SubprocessTest, CaptureStderr) {
+ const std::string config_file =
+ ::aos::testing::ArtifactPath("aos/events/pingpong_config.json");
+
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(config_file);
+ aos::ShmEventLoop event_loop(&config.message());
+ bool observed_stopped = false;
+ Application echo_stderr(
+ "echo", "sh", &event_loop, [&observed_stopped, &echo_stderr]() {
+ if (echo_stderr.status() == aos::starter::State::STOPPED) {
+ observed_stopped = true;
+ }
+ });
+ echo_stderr.set_args({"-c", "echo abcdef >&2"});
+ echo_stderr.set_capture_stdout(true);
+ echo_stderr.set_capture_stderr(true);
+
+ echo_stderr.Start();
+ event_loop.AddTimer([&event_loop]() { event_loop.Exit(); })
+ ->Setup(event_loop.monotonic_now() + std::chrono::seconds(1));
+
+ event_loop.Run();
+
+ ASSERT_EQ("abcdef\n", echo_stderr.GetStderr());
+ ASSERT_TRUE(echo_stderr.GetStdout().empty());
+ ASSERT_TRUE(observed_stopped);
+ ASSERT_EQ(aos::starter::State::STOPPED, echo_stderr.status());
+}
+
+} // namespace aos::starter::testing
diff --git a/aos/util/BUILD b/aos/util/BUILD
index d75d421..0314069 100644
--- a/aos/util/BUILD
+++ b/aos/util/BUILD
@@ -236,6 +236,27 @@
],
)
+cc_library(
+ name = "scoped_pipe",
+ srcs = ["scoped_pipe.cc"],
+ hdrs = ["scoped_pipe.h"],
+ target_compatible_with = ["@platforms//os:linux"],
+ deps = [
+ "@com_github_google_glog//:glog",
+ "@com_google_absl//absl/types:span",
+ ],
+)
+
+cc_test(
+ name = "scoped_pipe_test",
+ srcs = ["scoped_pipe_test.cc"],
+ target_compatible_with = ["@platforms//os:linux"],
+ deps = [
+ ":scoped_pipe",
+ "//aos/testing:googletest",
+ ],
+)
+
py_library(
name = "python_init",
srcs = ["__init__.py"],
diff --git a/aos/util/scoped_pipe.cc b/aos/util/scoped_pipe.cc
new file mode 100644
index 0000000..d677b07
--- /dev/null
+++ b/aos/util/scoped_pipe.cc
@@ -0,0 +1,95 @@
+#include "aos/util/scoped_pipe.h"
+
+#include <fcntl.h>
+#include "glog/logging.h"
+
+namespace aos::util {
+
+ScopedPipe::ScopedPipe(int fd) : fd_(fd) {}
+
+ScopedPipe::~ScopedPipe() {
+ if (fd_ != -1) {
+ PCHECK(close(fd_) != -1);
+ }
+}
+
+ScopedPipe::ScopedPipe(ScopedPipe &&scoped_pipe) : fd_(scoped_pipe.fd_) {
+ scoped_pipe.fd_ = -1;
+}
+
+ScopedPipe &ScopedPipe::operator=(ScopedPipe &&scoped_pipe) {
+ if (fd_ != -1) {
+ PCHECK(close(fd_) != -1);
+ }
+ fd_ = scoped_pipe.fd_;
+ scoped_pipe.fd_ = -1;
+ return *this;
+}
+
+ScopedPipe::PipePair ScopedPipe::MakePipe() {
+ int fds[2];
+ PCHECK(pipe(fds) != -1);
+ PCHECK(fcntl(fds[0], F_SETFL, fcntl(fds[0], F_GETFL) | O_NONBLOCK) != -1);
+ PCHECK(fcntl(fds[1], F_SETFL, fcntl(fds[1], F_GETFL) | O_NONBLOCK) != -1);
+ return {std::unique_ptr<ScopedReadPipe>(new ScopedReadPipe(fds[0])),
+ std::unique_ptr<ScopedWritePipe>(new ScopedWritePipe(fds[1]))};
+}
+
+void ScopedPipe::SetCloexec() {
+ // FD_CLOEXEC is the only known file descriptor flag, but call GETFD just in
+ // case.
+ int flags = fcntl(fd(), F_GETFD);
+ PCHECK(flags != -1);
+ PCHECK(fcntl(fd(), F_SETFD, flags | FD_CLOEXEC) != -1);
+}
+
+size_t ScopedPipe::ScopedReadPipe::Read(std::string *buffer) {
+ CHECK_NOTNULL(buffer);
+ constexpr ssize_t kBufferSize = 1024;
+ const size_t original_size = buffer->size();
+ size_t read_bytes = 0;
+ while (true) {
+ buffer->resize(buffer->size() + kBufferSize);
+ const ssize_t result =
+ read(fd(), buffer->data() + buffer->size() - kBufferSize, kBufferSize);
+ if (result == -1) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ buffer->resize(original_size);
+ return 0;
+ }
+ PLOG(FATAL) << "Error on reading pipe.";
+ } else if (result < kBufferSize) {
+ read_bytes += result;
+ buffer->resize(original_size + read_bytes);
+ break;
+ } else {
+ CHECK_EQ(result, kBufferSize);
+ read_bytes += result;
+ }
+ }
+ return read_bytes;
+}
+
+std::optional<uint32_t> ScopedPipe::ScopedReadPipe::Read() {
+ uint32_t buf;
+ ssize_t result = read(fd(), &buf, sizeof(buf));
+ if (result == sizeof(buf)) {
+ return buf;
+ } else {
+ return std::nullopt;
+ }
+}
+
+void ScopedPipe::ScopedWritePipe::Write(uint32_t data) {
+ ssize_t result = write(fd(), &data, sizeof(data));
+ PCHECK(result != -1);
+ CHECK_EQ(static_cast<size_t>(result), sizeof(data));
+}
+
+void ScopedPipe::ScopedWritePipe::Write(absl::Span<const uint8_t> data) {
+ ssize_t result = write(fd(), data.data(), data.size());
+ PCHECK(result != -1);
+ CHECK_EQ(static_cast<size_t>(result), data.size());
+}
+
+} // namespace aos::util
diff --git a/aos/util/scoped_pipe.h b/aos/util/scoped_pipe.h
new file mode 100644
index 0000000..fb91e02
--- /dev/null
+++ b/aos/util/scoped_pipe.h
@@ -0,0 +1,71 @@
+#ifndef AOS_UTIL_SCOPED_PIPE_H_
+#define AOS_UTIL_SCOPED_PIPE_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <optional>
+
+#include "absl/types/span.h"
+
+namespace aos::util {
+
+// RAII Pipe for sending individual ints between reader and writer.
+class ScopedPipe {
+ public:
+ class ScopedReadPipe;
+ class ScopedWritePipe;
+
+ struct PipePair {
+ std::unique_ptr<ScopedReadPipe> read;
+ std::unique_ptr<ScopedWritePipe> write;
+ };
+
+ static PipePair MakePipe();
+
+ virtual ~ScopedPipe();
+
+ int fd() const { return fd_; }
+ // Sets FD_CLOEXEC on the file descriptor.
+ void SetCloexec();
+
+ private:
+ ScopedPipe(int fd = -1);
+
+ int fd_;
+
+ ScopedPipe(const ScopedPipe &) = delete;
+ ScopedPipe &operator=(const ScopedPipe &) = delete;
+ ScopedPipe(ScopedPipe &&);
+ ScopedPipe &operator=(ScopedPipe &&);
+};
+
+class ScopedPipe::ScopedReadPipe : public ScopedPipe {
+ public:
+ std::optional<uint32_t> Read();
+ // Reads as many bytes as possible out of the pipe, appending them to the end
+ // of the provided buffer. Returns the number of bytes read. Dies on errors
+ // other than EAGAIN or EWOULDBLOCK.
+ size_t Read(std::string *buffer);
+
+ private:
+ using ScopedPipe::ScopedPipe;
+
+ friend class ScopedPipe;
+};
+
+class ScopedPipe::ScopedWritePipe : public ScopedPipe {
+ public:
+ void Write(uint32_t data);
+ // Writes the entirety of the specified buffer to the pipe. Dies on failure.
+ void Write(absl::Span<const uint8_t> data);
+
+ private:
+ using ScopedPipe::ScopedPipe;
+
+ friend class ScopedPipe;
+};
+
+} // namespace aos::util
+
+#endif // AOS_UTIL_SCOPED_PIPE_H_
diff --git a/aos/util/scoped_pipe_test.cc b/aos/util/scoped_pipe_test.cc
new file mode 100644
index 0000000..c71e272
--- /dev/null
+++ b/aos/util/scoped_pipe_test.cc
@@ -0,0 +1,57 @@
+#include "aos/util/scoped_pipe.h"
+
+#include <fcntl.h>
+
+#include <array>
+#include <string>
+
+#include "gtest/gtest.h"
+
+namespace aos {
+namespace util {
+namespace testing {
+
+// Tests using uint32_t read/write methods on the ScopedPipe objects.
+TEST(ScopedPipeTest, IntegerPipe) {
+ ScopedPipe::PipePair pipe = ScopedPipe::MakePipe();
+ ASSERT_FALSE(pipe.read->Read().has_value())
+ << "Shouldn't get anything on empty read.";
+ pipe.write->Write(971);
+ ASSERT_EQ(971, pipe.read->Read().value());
+}
+
+// Tests using string read/write methods on the ScopedPipe objects.
+TEST(ScopedPipeTest, StringPipe) {
+ ScopedPipe::PipePair pipe = ScopedPipe::MakePipe();
+ std::string buffer;
+ ASSERT_EQ(0u, pipe.read->Read(&buffer))
+ << "Shouldn't get anything on empty read.";
+ ASSERT_TRUE(buffer.empty());
+
+ const char *const kAbc = "abcdef";
+ pipe.write->Write(
+ absl::Span<const uint8_t>(reinterpret_cast<const uint8_t *>(kAbc), 6));
+ ASSERT_EQ(6u, pipe.read->Read(&buffer));
+ ASSERT_EQ("abcdef", buffer);
+
+ std::array<uint8_t, 10000> large_buffer;
+ large_buffer.fill(99);
+ pipe.write->Write(
+ absl::Span<const uint8_t>(large_buffer.data(), large_buffer.size()));
+ ASSERT_EQ(large_buffer.size(), pipe.read->Read(&buffer));
+ for (size_t ii = 0; ii < large_buffer.size(); ++ii) {
+ ASSERT_EQ(large_buffer[ii], buffer[ii + 6]);
+ }
+}
+
+// Tests that calling SetCloexec succeeds and does indeed set FD_CLOEXEC.
+TEST(ScopedPipeTest, SetCloexec) {
+ ScopedPipe::PipePair pipe = ScopedPipe::MakePipe();
+ ASSERT_EQ(0, fcntl(pipe.read->fd(), F_GETFD) & FD_CLOEXEC);
+ pipe.read->SetCloexec();
+ ASSERT_NE(0, fcntl(pipe.read->fd(), F_GETFD) & FD_CLOEXEC);
+}
+
+} // namespace testing
+} // namespace util
+} // namespace aos
diff --git a/third_party/seasocks/src/main/c/md5/md5.cpp b/third_party/seasocks/src/main/c/md5/md5.cpp
index 64c1f02..76ef31b 100644
--- a/third_party/seasocks/src/main/c/md5/md5.cpp
+++ b/third_party/seasocks/src/main/c/md5/md5.cpp
@@ -173,28 +173,18 @@
{
#if BYTE_ORDER == 0
/*
- * Determine dynamically whether this is a big-endian or
- * little-endian machine, since we can use a more efficient
- * algorithm on the latter.
- */
+ * Determine dynamically whether this is a big-endian or
+ * little-endian machine, since we can use a more efficient
+ * algorithm on the latter.
+ */
static const int w = 1;
if (*((const md5_byte_t*) &w)) /* dynamic little-endian */
#endif
#if BYTE_ORDER <= 0 /* little-endian */
{
- /*
- * On little-endian machines, we can process properly aligned
- * data without copying it.
- */
- if (!((data - (const md5_byte_t*) 0) & 3)) {
- /* data are properly aligned */
- X = (const md5_word_t*) data;
- } else {
- /* not aligned */
- memcpy(xbuf, data, 64);
- X = xbuf;
- }
+ memcpy(xbuf, data, 64);
+ X = xbuf;
}
#endif
#if BYTE_ORDER == 0
@@ -203,9 +193,9 @@
#if BYTE_ORDER >= 0 /* big-endian */
{
/*
- * On big-endian machines, we must arrange the bytes in the
- * right order.
- */
+ * On big-endian machines, we must arrange the bytes in the
+ * right order.
+ */
const md5_byte_t* xp = data;
int i;