blob: b6a20f0b202877e412e030c214104e0dbcfa1a48 [file] [log] [blame]
James Kuszmaul3224b8e2022-01-07 19:00:39 -08001#include "aos/starter/subprocess.h"
2
3#include <grp.h>
4#include <pwd.h>
5#include <sys/prctl.h>
6#include <sys/types.h>
7#include <sys/wait.h>
8
James Kuszmaul37a56af2023-07-29 15:15:16 -07009#include "absl/strings/str_split.h"
James Kuszmaul3224b8e2022-01-07 19:00:39 -080010#include "glog/logging.h"
11
James Kuszmaul8544c492023-07-31 15:00:38 -070012#include "aos/flatbuffer_merge.h"
13
James Kuszmaul3224b8e2022-01-07 19:00:39 -080014namespace aos::starter {
15
Philipp Schraderfa8fc492023-09-26 14:52:02 -070016// Blocks all signals while an instance of this class is in scope.
17class ScopedCompleteSignalBlocker {
18 public:
19 ScopedCompleteSignalBlocker() {
20 sigset_t mask;
21 sigfillset(&mask);
22 // Remember the current mask.
23 PCHECK(sigprocmask(SIG_SETMASK, &mask, &old_mask_) == 0);
24 }
25
26 ~ScopedCompleteSignalBlocker() {
27 // Restore the remembered mask.
28 PCHECK(sigprocmask(SIG_SETMASK, &old_mask_, nullptr) == 0);
29 }
30
31 private:
32 sigset_t old_mask_;
33};
34
James Kuszmaul37a56af2023-07-29 15:15:16 -070035namespace {
36std::optional<ino_t> GetInodeForPath(const std::filesystem::path &path) {
37 struct stat stat_buf;
38 if (0 != stat(path.c_str(), &stat_buf)) {
39 return std::nullopt;
40 }
41 return stat_buf.st_ino;
42}
43bool InodeChanged(const std::filesystem::path &path, ino_t previous_inode) {
44 const std::optional<ino_t> current_inode = GetInodeForPath(path);
45 if (!current_inode.has_value()) {
46 return true;
47 }
48 return current_inode.value() != previous_inode;
49}
50} // namespace
51
52std::filesystem::path ResolvePath(std::string_view command) {
53 std::filesystem::path command_path = command;
54 if (command.find("/") != std::string_view::npos) {
55 CHECK(std::filesystem::exists(command_path))
56 << ": " << command << " does not exist.";
57 return std::filesystem::canonical(command_path);
58 }
59 const char *system_path = getenv("PATH");
60 std::string system_path_buffer;
61 if (system_path == nullptr) {
62 const size_t default_path_length = confstr(_CS_PATH, nullptr, 0);
63 PCHECK(default_path_length != 0) << ": Unable to resolve " << command;
64 system_path_buffer.resize(default_path_length);
65 confstr(_CS_PATH, system_path_buffer.data(), system_path_buffer.size());
66 system_path = system_path_buffer.c_str();
67 VLOG(2) << "Using default path of " << system_path
68 << " in the absence of PATH being set.";
69 }
70 const std::vector<std::string_view> search_paths =
71 absl::StrSplit(system_path, ':');
72 for (const std::string_view search_path : search_paths) {
73 const std::filesystem::path candidate =
74 std::filesystem::path(search_path) / command_path;
75 if (std::filesystem::exists(candidate)) {
76 return std::filesystem::canonical(candidate);
77 }
78 }
79 LOG(FATAL) << "Unable to resolve " << command;
80}
81
Austin Schuhbbeb37e2022-08-17 16:19:27 -070082// RAII class to become root and restore back to the original user and group
83// afterwards.
84class Sudo {
85 public:
86 Sudo() {
87 // Save what we were.
88 PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
89 PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
90
91 // Become root.
92 PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
93 << ": Failed to become root";
94 PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
95 << ": Failed to become root";
96 }
97
98 ~Sudo() {
99 // And recover.
100 PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
101 PCHECK(setresuid(ruid_, euid_, suid_) == 0);
102 }
103
104 uid_t ruid_, euid_, suid_;
105 gid_t rgid_, egid_, sgid_;
106};
107
Austin Schuh77e20a32023-08-01 12:25:03 -0700108MemoryCGroup::MemoryCGroup(std::string_view name, Create should_create)
109 : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)),
110 should_create_(should_create) {
111 if (should_create_ == Create::kDoCreate) {
112 Sudo sudo;
113 int ret = mkdir(cgroup_.c_str(), 0755);
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700114
Austin Schuh77e20a32023-08-01 12:25:03 -0700115 if (ret != 0) {
116 if (errno == EEXIST) {
117 PCHECK(rmdir(cgroup_.c_str()) == 0)
118 << ": Failed to remove previous cgroup " << cgroup_;
119 ret = mkdir(cgroup_.c_str(), 0755);
120 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700121 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700122
Austin Schuh77e20a32023-08-01 12:25:03 -0700123 if (ret != 0) {
124 PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
125 << ", do you have permission?";
126 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700127 }
128}
129
130void MemoryCGroup::AddTid(pid_t pid) {
131 if (pid == 0) {
132 pid = getpid();
133 }
Austin Schuh77e20a32023-08-01 12:25:03 -0700134 if (should_create_ == Create::kDoCreate) {
135 Sudo sudo;
136 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
137 std::to_string(pid));
138 } else {
139 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
140 std::to_string(pid));
141 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700142}
143
144void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
Austin Schuh77e20a32023-08-01 12:25:03 -0700145 if (should_create_ == Create::kDoCreate) {
146 Sudo sudo;
147 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
148 std::to_string(limit_value));
149 } else {
150 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
151 std::to_string(limit_value));
152 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700153}
154
155MemoryCGroup::~MemoryCGroup() {
Austin Schuh77e20a32023-08-01 12:25:03 -0700156 if (should_create_ == Create::kDoCreate) {
157 Sudo sudo;
158 PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
159 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700160}
161
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800162SignalListener::SignalListener(aos::ShmEventLoop *loop,
163 std::function<void(signalfd_siginfo)> callback)
Austin Schuh1cea9032023-07-10 11:56:40 -0700164 : SignalListener(loop->epoll(), std::move(callback)) {}
165
166SignalListener::SignalListener(aos::internal::EPoll *epoll,
167 std::function<void(signalfd_siginfo)> callback)
168 : SignalListener(epoll, callback,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800169 {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
170 SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
171
172SignalListener::SignalListener(aos::ShmEventLoop *loop,
173 std::function<void(signalfd_siginfo)> callback,
174 std::initializer_list<unsigned int> signals)
Austin Schuh1cea9032023-07-10 11:56:40 -0700175 : SignalListener(loop->epoll(), std::move(callback), std::move(signals)) {}
176
177SignalListener::SignalListener(aos::internal::EPoll *epoll,
178 std::function<void(signalfd_siginfo)> callback,
179 std::initializer_list<unsigned int> signals)
180 : epoll_(epoll), callback_(std::move(callback)), signalfd_(signals) {
181 epoll_->OnReadable(signalfd_.fd(), [this] {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800182 signalfd_siginfo info = signalfd_.Read();
183
184 if (info.ssi_signo == 0) {
185 LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
186 return;
187 }
188
189 callback_(info);
190 });
191}
192
Austin Schuh1cea9032023-07-10 11:56:40 -0700193SignalListener::~SignalListener() { epoll_->DeleteFd(signalfd_.fd()); }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800194
James Kuszmauld42edb42022-01-07 18:00:16 -0800195Application::Application(std::string_view name,
196 std::string_view executable_name,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800197 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700198 std::function<void()> on_change,
199 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800200 : name_(name),
James Kuszmaul37a56af2023-07-29 15:15:16 -0700201 path_(ResolvePath(executable_name)),
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800202 event_loop_(event_loop),
203 start_timer_(event_loop_->AddTimer([this] {
204 status_ = aos::starter::State::RUNNING;
payton.rehl2841b1c2023-05-25 17:23:55 -0700205 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
206 << "Started '" << name_ << "' pid: " << pid_;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700207 // Check if the file on disk changed while we were starting up. We allow
208 // this state for the same reason that we don't just use /proc/$pid/exe
209 // to determine if the file is deleted--we may be running a script or
210 // sudo or the such and determining the state of the file that we
211 // actually care about sounds like more work than we want to deal with.
212 if (InodeChanged(path_, pre_fork_inode_)) {
213 file_state_ = FileState::CHANGED_DURING_STARTUP;
214 } else {
215 file_state_ = FileState::NO_CHANGE;
216 }
217
218 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800219 })),
220 restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
221 stop_timer_(event_loop_->AddTimer([this] {
222 if (kill(pid_, SIGKILL) == 0) {
Philipp Schrader595979d2023-09-13 11:31:48 -0700223 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
224 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700225 << "Failed to stop, sending SIGKILL to '" << name_
226 << "' pid: " << pid_;
Sarah Newman9687e062023-09-08 12:22:27 -0700227 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700228 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
229 quiet_flag_ == QuietLogging::kNotForDebugging)
Sarah Newman9687e062023-09-08 12:22:27 -0700230 << "Failed to send SIGKILL to '" << name_ << "' pid: " << pid_;
231 stop_timer_->Schedule(event_loop_->monotonic_now() +
232 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800233 }
234 })),
James Kuszmauld42edb42022-01-07 18:00:16 -0800235 pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
236 child_status_handler_(
237 event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
Austin Schuh1cea9032023-07-10 11:56:40 -0700238 on_change_({on_change}),
payton.rehl2841b1c2023-05-25 17:23:55 -0700239 quiet_flag_(quiet_flag) {
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700240 // Every second poll to check if the child is dead. This is used as a
241 // default for the case where the user is not directly catching SIGCHLD and
242 // calling MaybeHandleSignal for us.
243 child_status_handler_->Schedule(event_loop_->monotonic_now(),
244 std::chrono::seconds(1));
James Kuszmauld42edb42022-01-07 18:00:16 -0800245}
246
247Application::Application(const aos::Application *application,
248 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700249 std::function<void()> on_change,
250 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800251 : Application(application->name()->string_view(),
252 application->has_executable_name()
253 ? application->executable_name()->string_view()
254 : application->name()->string_view(),
payton.rehl2841b1c2023-05-25 17:23:55 -0700255 event_loop, on_change, quiet_flag) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800256 user_name_ = application->has_user() ? application->user()->str() : "";
257 user_ = application->has_user() ? FindUid(user_name_.c_str()) : std::nullopt;
258 group_ = application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
259 : std::nullopt;
260 autostart_ = application->autostart();
261 autorestart_ = application->autorestart();
262 if (application->has_args()) {
263 set_args(*application->args());
264 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700265
266 if (application->has_memory_limit() && application->memory_limit() > 0) {
267 SetMemoryLimit(application->memory_limit());
268 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800269}
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800270
271void Application::DoStart() {
272 if (status_ != aos::starter::State::WAITING) {
273 return;
274 }
275
276 start_timer_->Disable();
277 restart_timer_->Disable();
278
James Kuszmauld42edb42022-01-07 18:00:16 -0800279 status_pipes_ = util::ScopedPipe::MakePipe();
280
281 if (capture_stdout_) {
282 stdout_pipes_ = util::ScopedPipe::MakePipe();
283 stdout_.clear();
284 }
285 if (capture_stderr_) {
286 stderr_pipes_ = util::ScopedPipe::MakePipe();
287 stderr_.clear();
288 }
289
Philipp Schradera6712522023-07-05 20:25:11 -0700290 pipe_timer_->Schedule(event_loop_->monotonic_now(),
291 std::chrono::milliseconds(100));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800292
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700293 {
294 // Block all signals during the fork() call. Together with the default
295 // signal handler restoration below, This prevents signal handlers from
296 // getting called in the child and accidentally affecting the parent. In
297 // particular, the exit handler for shm_event_loop could be called here if
298 // we don't exec() quickly enough.
299 ScopedCompleteSignalBlocker signal_blocker;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700300 {
301 const std::optional<ino_t> inode = GetInodeForPath(path_);
302 CHECK(inode.has_value())
303 << ": " << path_ << " does not seem to be stat'able.";
304 pre_fork_inode_ = inode.value();
305 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700306 const pid_t pid = fork();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800307
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700308 if (pid != 0) {
309 if (pid == -1) {
310 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
311 quiet_flag_ == QuietLogging::kNotForDebugging)
312 << "Failed to fork '" << name_ << "'";
313 stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
314 status_ = aos::starter::State::STOPPED;
315 } else {
316 pid_ = pid;
317 id_ = next_id_++;
318 start_time_ = event_loop_->monotonic_now();
319 status_ = aos::starter::State::STARTING;
320 latest_timing_report_version_.reset();
321 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
322 << "Starting '" << name_ << "' pid " << pid_;
323
324 // Set up timer which moves application to RUNNING state if it is still
325 // alive in 1 second.
326 start_timer_->Schedule(event_loop_->monotonic_now() +
327 std::chrono::seconds(1));
328 // Since we are the parent process, clear our write-side of all the
329 // pipes.
330 status_pipes_.write.reset();
331 stdout_pipes_.write.reset();
332 stderr_pipes_.write.reset();
333 }
334 OnChange();
335 return;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800336 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700337
338 // Clear any signal handlers so that they don't accidentally interfere with
339 // the parent process. Is there a better way to iterate over all the
340 // signals? Right now we're just dealing with the most common ones.
341 for (int signal : {SIGINT, SIGHUP, SIGTERM}) {
342 struct sigaction action;
343 sigemptyset(&action.sa_mask);
344 action.sa_flags = 0;
345 action.sa_handler = SIG_DFL;
346 PCHECK(sigaction(signal, &action, nullptr) == 0);
347 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800348 }
349
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700350 if (memory_cgroup_) {
351 memory_cgroup_->AddTid();
352 }
353
James Kuszmauld42edb42022-01-07 18:00:16 -0800354 // Since we are the child process, clear our read-side of all the pipes.
355 status_pipes_.read.reset();
356 stdout_pipes_.read.reset();
357 stderr_pipes_.read.reset();
358
359 // The status pipe will not be needed if the execve succeeds.
360 status_pipes_.write->SetCloexec();
361
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800362 // Clear out signal mask of parent so forked process receives all signals
363 // normally.
364 sigset_t empty_mask;
365 sigemptyset(&empty_mask);
366 sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
367
368 // Cleanup children if starter dies in a way that is not handled gracefully.
369 if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800370 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800371 static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
372 PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
373 }
374
375 if (group_) {
376 CHECK(!user_name_.empty());
377 // The manpage for setgroups says we just need CAP_SETGID, but empirically
378 // we also need the effective UID to be 0 to make it work. user_ must also
379 // be set so we change this effective UID back later.
380 CHECK(user_);
381 if (seteuid(0) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800382 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800383 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
384 PLOG(FATAL) << "Could not seteuid(0) for " << name_
385 << " in preparation for setting groups";
386 }
387 if (initgroups(user_name_.c_str(), *group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800388 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800389 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
390 PLOG(FATAL) << "Could not initialize normal groups for " << name_
391 << " as " << user_name_ << " with " << *group_;
392 }
393 if (setgid(*group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800394 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800395 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
396 PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
397 }
398 }
399
400 if (user_) {
401 if (setuid(*user_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800402 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800403 static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
404 PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
405 }
406 }
407
James Kuszmauld42edb42022-01-07 18:00:16 -0800408 if (capture_stdout_) {
409 PCHECK(STDOUT_FILENO == dup2(stdout_pipes_.write->fd(), STDOUT_FILENO));
410 stdout_pipes_.write.reset();
411 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800412
James Kuszmauld42edb42022-01-07 18:00:16 -0800413 if (capture_stderr_) {
414 PCHECK(STDERR_FILENO == dup2(stderr_pipes_.write->fd(), STDERR_FILENO));
415 stderr_pipes_.write.reset();
416 }
417
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700418 if (run_as_sudo_) {
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700419 // For sudo we must supply the actual path
James Kuszmaul37a56af2023-07-29 15:15:16 -0700420 args_.insert(args_.begin(), path_.c_str());
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700421 args_.insert(args_.begin(), kSudo);
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700422 } else {
423 // argv[0] should be the program name
424 args_.insert(args_.begin(), name_);
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700425 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800426
427 std::vector<char *> cargs = CArgs();
Philipp Schrader790cb542023-07-05 21:06:52 -0700428 const char *path = run_as_sudo_ ? kSudo : path_.c_str();
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700429 execvp(path, cargs.data());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800430
431 // If we got here, something went wrong
James Kuszmauld42edb42022-01-07 18:00:16 -0800432 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800433 static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
Philipp Schrader595979d2023-09-13 11:31:48 -0700434 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
435 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700436 << "Could not execute " << name_ << " (" << path_ << ')';
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800437
438 _exit(EXIT_FAILURE);
439}
440
James Kuszmaul8544c492023-07-31 15:00:38 -0700441void Application::ObserveTimingReport(
442 const aos::monotonic_clock::time_point send_time,
443 const aos::timing::Report *msg) {
444 if (msg->name()->string_view() == name_ && msg->pid() == pid_ &&
445 msg->has_version()) {
446 latest_timing_report_version_ = msg->version()->str();
447 last_timing_report_ = send_time;
448 }
449}
450
James Kuszmauld42edb42022-01-07 18:00:16 -0800451void Application::FetchOutputs() {
452 if (capture_stdout_) {
453 stdout_pipes_.read->Read(&stdout_);
454 }
455 if (capture_stderr_) {
456 stderr_pipes_.read->Read(&stderr_);
457 }
458}
459
460const std::string &Application::GetStdout() {
461 CHECK(capture_stdout_);
462 FetchOutputs();
463 return stdout_;
464}
465
466const std::string &Application::GetStderr() {
467 CHECK(capture_stderr_);
468 FetchOutputs();
469 return stderr_;
470}
471
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800472void Application::DoStop(bool restart) {
473 // If stop or restart received, the old state of these is no longer applicable
474 // so cancel both.
475 restart_timer_->Disable();
476 start_timer_->Disable();
477
James Kuszmauld42edb42022-01-07 18:00:16 -0800478 FetchOutputs();
479
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800480 switch (status_) {
481 case aos::starter::State::STARTING:
482 case aos::starter::State::RUNNING: {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700483 file_state_ = FileState::NOT_RUNNING;
Philipp Schrader595979d2023-09-13 11:31:48 -0700484 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
485 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700486 << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
487 << SIGINT;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800488 status_ = aos::starter::State::STOPPING;
489
Philipp Schrader595979d2023-09-13 11:31:48 -0700490 if (kill(pid_, SIGINT) != 0) {
491 PLOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
492 quiet_flag_ == QuietLogging::kNotForDebugging)
493 << "Failed to send signal " << SIGINT << " to '" << name_
494 << "' pid: " << pid_;
495 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800496
497 // Watchdog timer to SIGKILL application if it is still running 1 second
498 // after SIGINT
Philipp Schradera6712522023-07-05 20:25:11 -0700499 stop_timer_->Schedule(event_loop_->monotonic_now() +
500 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800501 queue_restart_ = restart;
Austin Schuh1cea9032023-07-10 11:56:40 -0700502 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800503 break;
504 }
505 case aos::starter::State::WAITING: {
506 // If waiting to restart, and receives restart, skip the waiting period
507 // and restart immediately. If stop received, all we have to do is move
508 // to the STOPPED state.
509 if (restart) {
510 DoStart();
511 } else {
512 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700513 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800514 }
515 break;
516 }
517 case aos::starter::State::STOPPING: {
518 // If the application is already stopping, then we just need to update the
519 // restart flag to the most recent status.
520 queue_restart_ = restart;
521 break;
522 }
523 case aos::starter::State::STOPPED: {
524 // Restart immediately if the application is already stopped
525 if (restart) {
526 status_ = aos::starter::State::WAITING;
527 DoStart();
528 }
529 break;
530 }
531 }
532}
533
534void Application::QueueStart() {
535 status_ = aos::starter::State::WAITING;
536
payton.rehl2841b1c2023-05-25 17:23:55 -0700537 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
538 << "Restarting " << name_ << " in 3 seconds";
Philipp Schradera6712522023-07-05 20:25:11 -0700539 restart_timer_->Schedule(event_loop_->monotonic_now() +
540 std::chrono::seconds(3));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800541 start_timer_->Disable();
542 stop_timer_->Disable();
Austin Schuh1cea9032023-07-10 11:56:40 -0700543 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800544}
545
James Kuszmauld42edb42022-01-07 18:00:16 -0800546std::vector<char *> Application::CArgs() {
547 std::vector<char *> cargs;
548 std::transform(args_.begin(), args_.end(), std::back_inserter(cargs),
549 [](std::string &str) { return str.data(); });
550 cargs.push_back(nullptr);
551 return cargs;
552}
553
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800554void Application::set_args(
555 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
556 args_.clear();
557 std::transform(v.begin(), v.end(), std::back_inserter(args_),
James Kuszmauld42edb42022-01-07 18:00:16 -0800558 [](const flatbuffers::String *str) { return str->str(); });
559}
560
561void Application::set_args(std::vector<std::string> args) {
562 args_ = std::move(args);
563}
564
565void Application::set_capture_stdout(bool capture) {
566 capture_stdout_ = capture;
567}
568
569void Application::set_capture_stderr(bool capture) {
570 capture_stderr_ = capture;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800571}
572
573std::optional<uid_t> Application::FindUid(const char *name) {
574 // TODO(austin): Use the reentrant version. This should be safe.
575 struct passwd *user_data = getpwnam(name);
576 if (user_data != nullptr) {
577 return user_data->pw_uid;
578 } else {
579 LOG(FATAL) << "Could not find user " << name;
580 return std::nullopt;
581 }
582}
583
584std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
585 // TODO(austin): Use the reentrant version. This should be safe.
586 struct passwd *user_data = getpwnam(name);
587 if (user_data != nullptr) {
588 return user_data->pw_gid;
589 } else {
590 LOG(FATAL) << "Could not find user " << name;
591 return std::nullopt;
592 }
593}
594
James Kuszmaul37a56af2023-07-29 15:15:16 -0700595FileState Application::UpdateFileState() {
596 // On every call, check if a different file is present on disk. Note that
597 // while the applications is running, the file cannot be changed without the
598 // inode changing.
599 // We could presumably use inotify or the such to watch the file instead,
600 // but this works and we do not expect substantial cost from reading the inode
601 // of a file every time we send out a status message.
602 if (InodeChanged(path_, pre_fork_inode_)) {
603 switch (file_state_) {
604 case FileState::NO_CHANGE:
605 file_state_ = FileState::CHANGED;
606 break;
607 case FileState::NOT_RUNNING:
608 case FileState::CHANGED_DURING_STARTUP:
609 case FileState::CHANGED:
610 break;
611 }
612 }
613 return file_state_;
614}
615
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800616flatbuffers::Offset<aos::starter::ApplicationStatus>
James Kuszmaul6295a642022-03-22 15:23:59 -0700617Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
618 util::Top *top) {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700619 UpdateFileState();
620
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800621 CHECK_NOTNULL(builder);
622 auto name_fbs = builder->CreateString(name_);
623
James Kuszmaul6295a642022-03-22 15:23:59 -0700624 const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
625 const flatbuffers::Offset<util::ProcessInfo> process_info =
626 valid_pid ? top->InfoForProcess(builder, pid_)
627 : flatbuffers::Offset<util::ProcessInfo>();
628
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800629 aos::starter::ApplicationStatus::Builder status_builder(*builder);
630 status_builder.add_name(name_fbs);
631 status_builder.add_state(status_);
James Kuszmauld42edb42022-01-07 18:00:16 -0800632 if (exit_code_.has_value()) {
633 status_builder.add_last_exit_code(exit_code_.value());
634 }
James Kuszmaul8544c492023-07-31 15:00:38 -0700635 status_builder.add_has_active_timing_report(
636 last_timing_report_ +
637 // Leave a bit of margin on the timing report receipt time, to allow
638 // for timing errors.
639 3 * std::chrono::milliseconds(FLAGS_timing_report_ms) >
640 event_loop_->monotonic_now());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800641 status_builder.add_last_stop_reason(stop_reason_);
642 if (pid_ != -1) {
643 status_builder.add_pid(pid_);
644 status_builder.add_id(id_);
645 }
James Kuszmaul6295a642022-03-22 15:23:59 -0700646 // Note that even if process_info is null, calling add_process_info is fine.
647 status_builder.add_process_info(process_info);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800648 status_builder.add_last_start_time(start_time_.time_since_epoch().count());
James Kuszmaul37a56af2023-07-29 15:15:16 -0700649 status_builder.add_file_state(file_state_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800650 return status_builder.Finish();
651}
652
653void Application::Terminate() {
654 stop_reason_ = aos::starter::LastStopReason::TERMINATE;
655 DoStop(false);
656 terminating_ = true;
657}
658
659void Application::HandleCommand(aos::starter::Command cmd) {
660 switch (cmd) {
661 case aos::starter::Command::START: {
662 switch (status_) {
663 case aos::starter::State::WAITING: {
664 restart_timer_->Disable();
665 DoStart();
666 break;
667 }
668 case aos::starter::State::STARTING: {
669 break;
670 }
671 case aos::starter::State::RUNNING: {
672 break;
673 }
674 case aos::starter::State::STOPPING: {
675 queue_restart_ = true;
676 break;
677 }
678 case aos::starter::State::STOPPED: {
679 status_ = aos::starter::State::WAITING;
680 DoStart();
681 break;
682 }
683 }
684 break;
685 }
686 case aos::starter::Command::STOP: {
687 stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
688 DoStop(false);
689 break;
690 }
691 case aos::starter::Command::RESTART: {
692 stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
693 DoStop(true);
694 break;
695 }
696 }
697}
698
699bool Application::MaybeHandleSignal() {
700 int status;
701
Sarah Newman21c59202022-06-16 12:36:33 -0700702 if (status_ == aos::starter::State::WAITING ||
703 status_ == aos::starter::State::STOPPED) {
704 // We can't possibly have received a signal meant for this process.
705 return false;
706 }
707
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800708 // Check if the status of this process has changed
Sarah Newman21c59202022-06-16 12:36:33 -0700709 // The PID won't be -1 if this application has ever been run successfully
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800710 if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
711 return false;
712 }
713
714 // Check that the event was the process exiting
715 if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
716 return false;
717 }
718
James Kuszmauld42edb42022-01-07 18:00:16 -0800719 start_timer_->Disable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800720 exit_time_ = event_loop_->monotonic_now();
721 exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
James Kuszmaul37a56af2023-07-29 15:15:16 -0700722 file_state_ = FileState::NOT_RUNNING;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800723
James Kuszmauld42edb42022-01-07 18:00:16 -0800724 if (auto read_result = status_pipes_.read->Read()) {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800725 stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
726 }
727
728 switch (status_) {
729 case aos::starter::State::STARTING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800730 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700731 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
732 << "Application '" << name_ << "' pid " << pid_
733 << " exited with status " << exit_code_.value();
James Kuszmauld42edb42022-01-07 18:00:16 -0800734 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700735 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
736 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700737 << "Failed to start '" << name_ << "' on pid " << pid_
738 << " : Exited with status " << exit_code_.value();
James Kuszmauld42edb42022-01-07 18:00:16 -0800739 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800740 if (autorestart()) {
741 QueueStart();
742 } else {
743 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700744 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800745 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800746 break;
747 }
748 case aos::starter::State::RUNNING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800749 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700750 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
751 << "Application '" << name_ << "' pid " << pid_
752 << " exited with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800753 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700754 if (quiet_flag_ == QuietLogging::kNo ||
755 quiet_flag_ == QuietLogging::kNotForDebugging) {
James Kuszmaul8544c492023-07-31 15:00:38 -0700756 std::string version_string =
757 latest_timing_report_version_.has_value()
758 ? absl::StrCat("'", latest_timing_report_version_.value(),
759 "'")
760 : "unknown";
761 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
762 << "Application '" << name_ << "' pid " << pid_ << " version "
763 << version_string << " exited unexpectedly with status "
764 << exit_code_.value();
765 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800766 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800767 if (autorestart()) {
768 QueueStart();
769 } else {
770 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700771 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800772 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800773 break;
774 }
775 case aos::starter::State::STOPPING: {
payton.rehl2841b1c2023-05-25 17:23:55 -0700776 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
777 << "Successfully stopped '" << name_ << "' pid: " << pid_
778 << " with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800779 status_ = aos::starter::State::STOPPED;
780
781 // Disable force stop timer since the process already died
782 stop_timer_->Disable();
783
Austin Schuh1cea9032023-07-10 11:56:40 -0700784 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800785 if (terminating_) {
786 return true;
787 }
788
789 if (queue_restart_) {
790 queue_restart_ = false;
791 status_ = aos::starter::State::WAITING;
792 DoStart();
793 }
794 break;
795 }
796 case aos::starter::State::WAITING:
797 case aos::starter::State::STOPPED: {
Sarah Newman21c59202022-06-16 12:36:33 -0700798 __builtin_unreachable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800799 break;
800 }
801 }
802
803 return false;
804}
805
Austin Schuh1cea9032023-07-10 11:56:40 -0700806void Application::OnChange() {
807 for (auto &fn : on_change_) {
808 fn();
809 }
810}
811
Adam Snaider70deaf22023-08-11 13:58:34 -0700812Application::~Application() {
813 start_timer_->Disable();
814 restart_timer_->Disable();
815 stop_timer_->Disable();
816 pipe_timer_->Disable();
817 child_status_handler_->Disable();
818}
819
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800820} // namespace aos::starter