blob: 593945bbc1c77aec2e6db2d862b991ac048d1d0e [file] [log] [blame]
James Kuszmaul3224b8e2022-01-07 19:00:39 -08001#include "aos/starter/subprocess.h"
2
Stephan Pleinesf581a072024-05-23 20:59:27 -07003#include <errno.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -08004#include <grp.h>
5#include <pwd.h>
Stephan Pleinesf581a072024-05-23 20:59:27 -07006#include <signal.h>
7#include <stdlib.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -08008#include <sys/prctl.h>
Stephan Pleinesf581a072024-05-23 20:59:27 -07009#include <sys/stat.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -080010#include <sys/wait.h>
Stephan Pleinesf581a072024-05-23 20:59:27 -070011#include <unistd.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -080012
Stephan Pleinesf581a072024-05-23 20:59:27 -070013#include <compare>
14#include <iterator>
15#include <ostream>
16#include <ratio>
17
Austin Schuh99f7c6a2024-06-25 22:07:44 -070018#include "absl/flags/flag.h"
19#include "absl/log/check.h"
20#include "absl/log/log.h"
Stephan Pleinesf581a072024-05-23 20:59:27 -070021#include "absl/strings/str_cat.h"
James Kuszmaul37a56af2023-07-29 15:15:16 -070022#include "absl/strings/str_split.h"
James Kuszmaul3224b8e2022-01-07 19:00:39 -080023
Stephan Pleinesf581a072024-05-23 20:59:27 -070024#include "aos/util/file.h"
25#include "aos/util/process_info_generated.h"
James Kuszmaul8544c492023-07-31 15:00:38 -070026
James Kuszmaul3224b8e2022-01-07 19:00:39 -080027namespace aos::starter {
28
Philipp Schraderfa8fc492023-09-26 14:52:02 -070029// Blocks all signals while an instance of this class is in scope.
30class ScopedCompleteSignalBlocker {
31 public:
32 ScopedCompleteSignalBlocker() {
33 sigset_t mask;
34 sigfillset(&mask);
35 // Remember the current mask.
36 PCHECK(sigprocmask(SIG_SETMASK, &mask, &old_mask_) == 0);
37 }
38
39 ~ScopedCompleteSignalBlocker() {
40 // Restore the remembered mask.
41 PCHECK(sigprocmask(SIG_SETMASK, &old_mask_, nullptr) == 0);
42 }
43
44 private:
45 sigset_t old_mask_;
46};
47
James Kuszmaul37a56af2023-07-29 15:15:16 -070048namespace {
49std::optional<ino_t> GetInodeForPath(const std::filesystem::path &path) {
50 struct stat stat_buf;
51 if (0 != stat(path.c_str(), &stat_buf)) {
52 return std::nullopt;
53 }
54 return stat_buf.st_ino;
55}
56bool InodeChanged(const std::filesystem::path &path, ino_t previous_inode) {
57 const std::optional<ino_t> current_inode = GetInodeForPath(path);
58 if (!current_inode.has_value()) {
59 return true;
60 }
61 return current_inode.value() != previous_inode;
62}
63} // namespace
64
65std::filesystem::path ResolvePath(std::string_view command) {
66 std::filesystem::path command_path = command;
67 if (command.find("/") != std::string_view::npos) {
68 CHECK(std::filesystem::exists(command_path))
69 << ": " << command << " does not exist.";
70 return std::filesystem::canonical(command_path);
71 }
72 const char *system_path = getenv("PATH");
73 std::string system_path_buffer;
74 if (system_path == nullptr) {
75 const size_t default_path_length = confstr(_CS_PATH, nullptr, 0);
76 PCHECK(default_path_length != 0) << ": Unable to resolve " << command;
77 system_path_buffer.resize(default_path_length);
78 confstr(_CS_PATH, system_path_buffer.data(), system_path_buffer.size());
79 system_path = system_path_buffer.c_str();
80 VLOG(2) << "Using default path of " << system_path
81 << " in the absence of PATH being set.";
82 }
83 const std::vector<std::string_view> search_paths =
84 absl::StrSplit(system_path, ':');
85 for (const std::string_view search_path : search_paths) {
86 const std::filesystem::path candidate =
87 std::filesystem::path(search_path) / command_path;
88 if (std::filesystem::exists(candidate)) {
89 return std::filesystem::canonical(candidate);
90 }
91 }
92 LOG(FATAL) << "Unable to resolve " << command;
93}
94
Austin Schuhbbeb37e2022-08-17 16:19:27 -070095// RAII class to become root and restore back to the original user and group
96// afterwards.
97class Sudo {
98 public:
99 Sudo() {
100 // Save what we were.
101 PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
102 PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
103
104 // Become root.
105 PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
106 << ": Failed to become root";
107 PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
108 << ": Failed to become root";
109 }
110
111 ~Sudo() {
112 // And recover.
113 PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
114 PCHECK(setresuid(ruid_, euid_, suid_) == 0);
115 }
116
117 uid_t ruid_, euid_, suid_;
118 gid_t rgid_, egid_, sgid_;
119};
120
Austin Schuh77e20a32023-08-01 12:25:03 -0700121MemoryCGroup::MemoryCGroup(std::string_view name, Create should_create)
122 : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)),
123 should_create_(should_create) {
124 if (should_create_ == Create::kDoCreate) {
125 Sudo sudo;
126 int ret = mkdir(cgroup_.c_str(), 0755);
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700127
Austin Schuh77e20a32023-08-01 12:25:03 -0700128 if (ret != 0) {
129 if (errno == EEXIST) {
130 PCHECK(rmdir(cgroup_.c_str()) == 0)
131 << ": Failed to remove previous cgroup " << cgroup_;
132 ret = mkdir(cgroup_.c_str(), 0755);
133 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700134 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700135
Austin Schuh77e20a32023-08-01 12:25:03 -0700136 if (ret != 0) {
137 PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
138 << ", do you have permission?";
139 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700140 }
141}
142
143void MemoryCGroup::AddTid(pid_t pid) {
144 if (pid == 0) {
145 pid = getpid();
146 }
Austin Schuh77e20a32023-08-01 12:25:03 -0700147 if (should_create_ == Create::kDoCreate) {
148 Sudo sudo;
149 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
150 std::to_string(pid));
151 } else {
152 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
153 std::to_string(pid));
154 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700155}
156
157void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
Austin Schuh77e20a32023-08-01 12:25:03 -0700158 if (should_create_ == Create::kDoCreate) {
159 Sudo sudo;
160 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
161 std::to_string(limit_value));
162 } else {
163 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
164 std::to_string(limit_value));
165 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700166}
167
168MemoryCGroup::~MemoryCGroup() {
Austin Schuh77e20a32023-08-01 12:25:03 -0700169 if (should_create_ == Create::kDoCreate) {
170 Sudo sudo;
171 PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
172 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700173}
174
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800175SignalListener::SignalListener(aos::ShmEventLoop *loop,
176 std::function<void(signalfd_siginfo)> callback)
Austin Schuh1cea9032023-07-10 11:56:40 -0700177 : SignalListener(loop->epoll(), std::move(callback)) {}
178
179SignalListener::SignalListener(aos::internal::EPoll *epoll,
180 std::function<void(signalfd_siginfo)> callback)
181 : SignalListener(epoll, callback,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800182 {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
183 SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
184
185SignalListener::SignalListener(aos::ShmEventLoop *loop,
186 std::function<void(signalfd_siginfo)> callback,
187 std::initializer_list<unsigned int> signals)
Austin Schuh1cea9032023-07-10 11:56:40 -0700188 : SignalListener(loop->epoll(), std::move(callback), std::move(signals)) {}
189
190SignalListener::SignalListener(aos::internal::EPoll *epoll,
191 std::function<void(signalfd_siginfo)> callback,
192 std::initializer_list<unsigned int> signals)
193 : epoll_(epoll), callback_(std::move(callback)), signalfd_(signals) {
194 epoll_->OnReadable(signalfd_.fd(), [this] {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800195 signalfd_siginfo info = signalfd_.Read();
196
197 if (info.ssi_signo == 0) {
198 LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
199 return;
200 }
201
202 callback_(info);
203 });
204}
205
Austin Schuh1cea9032023-07-10 11:56:40 -0700206SignalListener::~SignalListener() { epoll_->DeleteFd(signalfd_.fd()); }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800207
James Kuszmauld42edb42022-01-07 18:00:16 -0800208Application::Application(std::string_view name,
209 std::string_view executable_name,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800210 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700211 std::function<void()> on_change,
212 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800213 : name_(name),
James Kuszmaul37a56af2023-07-29 15:15:16 -0700214 path_(ResolvePath(executable_name)),
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800215 event_loop_(event_loop),
216 start_timer_(event_loop_->AddTimer([this] {
217 status_ = aos::starter::State::RUNNING;
payton.rehl2841b1c2023-05-25 17:23:55 -0700218 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
219 << "Started '" << name_ << "' pid: " << pid_;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700220 // Check if the file on disk changed while we were starting up. We allow
221 // this state for the same reason that we don't just use /proc/$pid/exe
222 // to determine if the file is deleted--we may be running a script or
223 // sudo or the such and determining the state of the file that we
224 // actually care about sounds like more work than we want to deal with.
225 if (InodeChanged(path_, pre_fork_inode_)) {
226 file_state_ = FileState::CHANGED_DURING_STARTUP;
227 } else {
228 file_state_ = FileState::NO_CHANGE;
229 }
230
231 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800232 })),
233 restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
234 stop_timer_(event_loop_->AddTimer([this] {
235 if (kill(pid_, SIGKILL) == 0) {
Philipp Schrader595979d2023-09-13 11:31:48 -0700236 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
237 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700238 << "Failed to stop, sending SIGKILL to '" << name_
239 << "' pid: " << pid_;
Sarah Newman9687e062023-09-08 12:22:27 -0700240 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700241 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
242 quiet_flag_ == QuietLogging::kNotForDebugging)
Sarah Newman9687e062023-09-08 12:22:27 -0700243 << "Failed to send SIGKILL to '" << name_ << "' pid: " << pid_;
244 stop_timer_->Schedule(event_loop_->monotonic_now() +
245 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800246 }
247 })),
James Kuszmauld42edb42022-01-07 18:00:16 -0800248 pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
249 child_status_handler_(
250 event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
Austin Schuh1cea9032023-07-10 11:56:40 -0700251 on_change_({on_change}),
payton.rehl2841b1c2023-05-25 17:23:55 -0700252 quiet_flag_(quiet_flag) {
James Kuszmaul06a8f352024-03-15 14:15:57 -0700253 // Keep the length of the timer name bounded to some reasonable length.
254 start_timer_->set_name(absl::StrCat("app_start_", name.substr(0, 10)));
255 restart_timer_->set_name(absl::StrCat("app_restart_", name.substr(0, 10)));
256 stop_timer_->set_name(absl::StrCat("app_stop_", name.substr(0, 10)));
257 pipe_timer_->set_name(absl::StrCat("app_pipe_", name.substr(0, 10)));
258 child_status_handler_->set_name(
259 absl::StrCat("app_status_handler_", name.substr(0, 10)));
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700260 // Every second poll to check if the child is dead. This is used as a
James Kuszmaul06a8f352024-03-15 14:15:57 -0700261 // default for the case where the user is not directly catching SIGCHLD
262 // and calling MaybeHandleSignal for us.
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700263 child_status_handler_->Schedule(event_loop_->monotonic_now(),
264 std::chrono::seconds(1));
James Kuszmauld42edb42022-01-07 18:00:16 -0800265}
266
267Application::Application(const aos::Application *application,
268 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700269 std::function<void()> on_change,
270 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800271 : Application(application->name()->string_view(),
272 application->has_executable_name()
273 ? application->executable_name()->string_view()
274 : application->name()->string_view(),
payton.rehl2841b1c2023-05-25 17:23:55 -0700275 event_loop, on_change, quiet_flag) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800276 user_name_ = application->has_user() ? application->user()->str() : "";
277 user_ = application->has_user() ? FindUid(user_name_.c_str()) : std::nullopt;
278 group_ = application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
279 : std::nullopt;
280 autostart_ = application->autostart();
281 autorestart_ = application->autorestart();
282 if (application->has_args()) {
283 set_args(*application->args());
284 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700285
286 if (application->has_memory_limit() && application->memory_limit() > 0) {
287 SetMemoryLimit(application->memory_limit());
288 }
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800289
290 set_stop_grace_period(std::chrono::nanoseconds(application->stop_time()));
James Kuszmauld42edb42022-01-07 18:00:16 -0800291}
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800292
293void Application::DoStart() {
294 if (status_ != aos::starter::State::WAITING) {
295 return;
296 }
297
298 start_timer_->Disable();
299 restart_timer_->Disable();
300
James Kuszmauld42edb42022-01-07 18:00:16 -0800301 status_pipes_ = util::ScopedPipe::MakePipe();
302
303 if (capture_stdout_) {
304 stdout_pipes_ = util::ScopedPipe::MakePipe();
305 stdout_.clear();
306 }
307 if (capture_stderr_) {
308 stderr_pipes_ = util::ScopedPipe::MakePipe();
309 stderr_.clear();
310 }
311
Philipp Schradera6712522023-07-05 20:25:11 -0700312 pipe_timer_->Schedule(event_loop_->monotonic_now(),
313 std::chrono::milliseconds(100));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800314
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700315 {
316 // Block all signals during the fork() call. Together with the default
317 // signal handler restoration below, This prevents signal handlers from
318 // getting called in the child and accidentally affecting the parent. In
319 // particular, the exit handler for shm_event_loop could be called here if
320 // we don't exec() quickly enough.
321 ScopedCompleteSignalBlocker signal_blocker;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700322 {
323 const std::optional<ino_t> inode = GetInodeForPath(path_);
324 CHECK(inode.has_value())
325 << ": " << path_ << " does not seem to be stat'able.";
326 pre_fork_inode_ = inode.value();
327 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700328 const pid_t pid = fork();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800329
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700330 if (pid != 0) {
331 if (pid == -1) {
332 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
333 quiet_flag_ == QuietLogging::kNotForDebugging)
334 << "Failed to fork '" << name_ << "'";
335 stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
336 status_ = aos::starter::State::STOPPED;
337 } else {
338 pid_ = pid;
339 id_ = next_id_++;
340 start_time_ = event_loop_->monotonic_now();
341 status_ = aos::starter::State::STARTING;
342 latest_timing_report_version_.reset();
343 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
344 << "Starting '" << name_ << "' pid " << pid_;
345
346 // Set up timer which moves application to RUNNING state if it is still
347 // alive in 1 second.
348 start_timer_->Schedule(event_loop_->monotonic_now() +
349 std::chrono::seconds(1));
350 // Since we are the parent process, clear our write-side of all the
351 // pipes.
352 status_pipes_.write.reset();
353 stdout_pipes_.write.reset();
354 stderr_pipes_.write.reset();
355 }
356 OnChange();
357 return;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800358 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700359
360 // Clear any signal handlers so that they don't accidentally interfere with
361 // the parent process. Is there a better way to iterate over all the
362 // signals? Right now we're just dealing with the most common ones.
363 for (int signal : {SIGINT, SIGHUP, SIGTERM}) {
364 struct sigaction action;
365 sigemptyset(&action.sa_mask);
366 action.sa_flags = 0;
367 action.sa_handler = SIG_DFL;
368 PCHECK(sigaction(signal, &action, nullptr) == 0);
369 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800370 }
371
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700372 if (memory_cgroup_) {
373 memory_cgroup_->AddTid();
374 }
375
James Kuszmauld42edb42022-01-07 18:00:16 -0800376 // Since we are the child process, clear our read-side of all the pipes.
377 status_pipes_.read.reset();
378 stdout_pipes_.read.reset();
379 stderr_pipes_.read.reset();
380
381 // The status pipe will not be needed if the execve succeeds.
382 status_pipes_.write->SetCloexec();
383
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800384 // Clear out signal mask of parent so forked process receives all signals
385 // normally.
386 sigset_t empty_mask;
387 sigemptyset(&empty_mask);
388 sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
389
390 // Cleanup children if starter dies in a way that is not handled gracefully.
391 if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800392 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800393 static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
394 PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
395 }
396
397 if (group_) {
398 CHECK(!user_name_.empty());
399 // The manpage for setgroups says we just need CAP_SETGID, but empirically
400 // we also need the effective UID to be 0 to make it work. user_ must also
401 // be set so we change this effective UID back later.
402 CHECK(user_);
403 if (seteuid(0) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800404 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800405 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
406 PLOG(FATAL) << "Could not seteuid(0) for " << name_
407 << " in preparation for setting groups";
408 }
409 if (initgroups(user_name_.c_str(), *group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800410 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800411 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
412 PLOG(FATAL) << "Could not initialize normal groups for " << name_
413 << " as " << user_name_ << " with " << *group_;
414 }
415 if (setgid(*group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800416 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800417 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
418 PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
419 }
420 }
421
422 if (user_) {
423 if (setuid(*user_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800424 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800425 static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
426 PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
427 }
428 }
429
James Kuszmauld42edb42022-01-07 18:00:16 -0800430 if (capture_stdout_) {
431 PCHECK(STDOUT_FILENO == dup2(stdout_pipes_.write->fd(), STDOUT_FILENO));
432 stdout_pipes_.write.reset();
433 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800434
James Kuszmauld42edb42022-01-07 18:00:16 -0800435 if (capture_stderr_) {
436 PCHECK(STDERR_FILENO == dup2(stderr_pipes_.write->fd(), STDERR_FILENO));
437 stderr_pipes_.write.reset();
438 }
439
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700440 if (run_as_sudo_) {
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700441 // For sudo we must supply the actual path
James Kuszmaul37a56af2023-07-29 15:15:16 -0700442 args_.insert(args_.begin(), path_.c_str());
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700443 args_.insert(args_.begin(), kSudo);
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700444 } else {
445 // argv[0] should be the program name
446 args_.insert(args_.begin(), name_);
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700447 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800448
449 std::vector<char *> cargs = CArgs();
Philipp Schrader790cb542023-07-05 21:06:52 -0700450 const char *path = run_as_sudo_ ? kSudo : path_.c_str();
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700451 execvp(path, cargs.data());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800452
453 // If we got here, something went wrong
James Kuszmauld42edb42022-01-07 18:00:16 -0800454 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800455 static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
Philipp Schrader595979d2023-09-13 11:31:48 -0700456 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
457 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700458 << "Could not execute " << name_ << " (" << path_ << ')';
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800459
460 _exit(EXIT_FAILURE);
461}
462
James Kuszmaul8544c492023-07-31 15:00:38 -0700463void Application::ObserveTimingReport(
464 const aos::monotonic_clock::time_point send_time,
465 const aos::timing::Report *msg) {
466 if (msg->name()->string_view() == name_ && msg->pid() == pid_ &&
467 msg->has_version()) {
468 latest_timing_report_version_ = msg->version()->str();
469 last_timing_report_ = send_time;
470 }
471}
472
James Kuszmauld42edb42022-01-07 18:00:16 -0800473void Application::FetchOutputs() {
474 if (capture_stdout_) {
475 stdout_pipes_.read->Read(&stdout_);
476 }
477 if (capture_stderr_) {
478 stderr_pipes_.read->Read(&stderr_);
479 }
480}
481
482const std::string &Application::GetStdout() {
483 CHECK(capture_stdout_);
484 FetchOutputs();
485 return stdout_;
486}
487
488const std::string &Application::GetStderr() {
489 CHECK(capture_stderr_);
490 FetchOutputs();
491 return stderr_;
492}
493
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800494void Application::DoStop(bool restart) {
495 // If stop or restart received, the old state of these is no longer applicable
496 // so cancel both.
497 restart_timer_->Disable();
498 start_timer_->Disable();
499
James Kuszmauld42edb42022-01-07 18:00:16 -0800500 FetchOutputs();
501
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800502 switch (status_) {
503 case aos::starter::State::STARTING:
504 case aos::starter::State::RUNNING: {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700505 file_state_ = FileState::NOT_RUNNING;
Philipp Schrader595979d2023-09-13 11:31:48 -0700506 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
507 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700508 << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
509 << SIGINT;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800510 status_ = aos::starter::State::STOPPING;
511
Philipp Schrader595979d2023-09-13 11:31:48 -0700512 if (kill(pid_, SIGINT) != 0) {
513 PLOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
514 quiet_flag_ == QuietLogging::kNotForDebugging)
515 << "Failed to send signal " << SIGINT << " to '" << name_
516 << "' pid: " << pid_;
517 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800518
519 // Watchdog timer to SIGKILL application if it is still running 1 second
520 // after SIGINT
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800521 stop_timer_->Schedule(event_loop_->monotonic_now() + stop_grace_period_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800522 queue_restart_ = restart;
Austin Schuh1cea9032023-07-10 11:56:40 -0700523 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800524 break;
525 }
526 case aos::starter::State::WAITING: {
527 // If waiting to restart, and receives restart, skip the waiting period
528 // and restart immediately. If stop received, all we have to do is move
529 // to the STOPPED state.
530 if (restart) {
531 DoStart();
532 } else {
533 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700534 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800535 }
536 break;
537 }
538 case aos::starter::State::STOPPING: {
539 // If the application is already stopping, then we just need to update the
540 // restart flag to the most recent status.
541 queue_restart_ = restart;
542 break;
543 }
544 case aos::starter::State::STOPPED: {
545 // Restart immediately if the application is already stopped
546 if (restart) {
547 status_ = aos::starter::State::WAITING;
548 DoStart();
549 }
550 break;
551 }
552 }
553}
554
555void Application::QueueStart() {
556 status_ = aos::starter::State::WAITING;
557
payton.rehl2841b1c2023-05-25 17:23:55 -0700558 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
559 << "Restarting " << name_ << " in 3 seconds";
Philipp Schradera6712522023-07-05 20:25:11 -0700560 restart_timer_->Schedule(event_loop_->monotonic_now() +
561 std::chrono::seconds(3));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800562 start_timer_->Disable();
563 stop_timer_->Disable();
Austin Schuh1cea9032023-07-10 11:56:40 -0700564 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800565}
566
James Kuszmauld42edb42022-01-07 18:00:16 -0800567std::vector<char *> Application::CArgs() {
568 std::vector<char *> cargs;
569 std::transform(args_.begin(), args_.end(), std::back_inserter(cargs),
570 [](std::string &str) { return str.data(); });
571 cargs.push_back(nullptr);
572 return cargs;
573}
574
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800575void Application::set_args(
576 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
577 args_.clear();
578 std::transform(v.begin(), v.end(), std::back_inserter(args_),
James Kuszmauld42edb42022-01-07 18:00:16 -0800579 [](const flatbuffers::String *str) { return str->str(); });
580}
581
582void Application::set_args(std::vector<std::string> args) {
583 args_ = std::move(args);
584}
585
586void Application::set_capture_stdout(bool capture) {
587 capture_stdout_ = capture;
588}
589
590void Application::set_capture_stderr(bool capture) {
591 capture_stderr_ = capture;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800592}
593
594std::optional<uid_t> Application::FindUid(const char *name) {
595 // TODO(austin): Use the reentrant version. This should be safe.
596 struct passwd *user_data = getpwnam(name);
597 if (user_data != nullptr) {
598 return user_data->pw_uid;
599 } else {
600 LOG(FATAL) << "Could not find user " << name;
601 return std::nullopt;
602 }
603}
604
605std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
606 // TODO(austin): Use the reentrant version. This should be safe.
607 struct passwd *user_data = getpwnam(name);
608 if (user_data != nullptr) {
609 return user_data->pw_gid;
610 } else {
611 LOG(FATAL) << "Could not find user " << name;
612 return std::nullopt;
613 }
614}
615
James Kuszmaul37a56af2023-07-29 15:15:16 -0700616FileState Application::UpdateFileState() {
617 // On every call, check if a different file is present on disk. Note that
618 // while the applications is running, the file cannot be changed without the
619 // inode changing.
620 // We could presumably use inotify or the such to watch the file instead,
621 // but this works and we do not expect substantial cost from reading the inode
622 // of a file every time we send out a status message.
623 if (InodeChanged(path_, pre_fork_inode_)) {
624 switch (file_state_) {
625 case FileState::NO_CHANGE:
626 file_state_ = FileState::CHANGED;
627 break;
628 case FileState::NOT_RUNNING:
629 case FileState::CHANGED_DURING_STARTUP:
630 case FileState::CHANGED:
631 break;
632 }
633 }
634 return file_state_;
635}
636
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800637flatbuffers::Offset<aos::starter::ApplicationStatus>
James Kuszmaul6295a642022-03-22 15:23:59 -0700638Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
639 util::Top *top) {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700640 UpdateFileState();
641
Austin Schuh6bdcc372024-06-27 14:49:11 -0700642 CHECK(builder != nullptr);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800643 auto name_fbs = builder->CreateString(name_);
644
James Kuszmaul6295a642022-03-22 15:23:59 -0700645 const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
646 const flatbuffers::Offset<util::ProcessInfo> process_info =
647 valid_pid ? top->InfoForProcess(builder, pid_)
648 : flatbuffers::Offset<util::ProcessInfo>();
649
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800650 aos::starter::ApplicationStatus::Builder status_builder(*builder);
651 status_builder.add_name(name_fbs);
652 status_builder.add_state(status_);
James Kuszmauld42edb42022-01-07 18:00:16 -0800653 if (exit_code_.has_value()) {
654 status_builder.add_last_exit_code(exit_code_.value());
655 }
James Kuszmaul8544c492023-07-31 15:00:38 -0700656 status_builder.add_has_active_timing_report(
657 last_timing_report_ +
658 // Leave a bit of margin on the timing report receipt time, to allow
659 // for timing errors.
Austin Schuh99f7c6a2024-06-25 22:07:44 -0700660 3 * std::chrono::milliseconds(absl::GetFlag(FLAGS_timing_report_ms)) >
James Kuszmaul8544c492023-07-31 15:00:38 -0700661 event_loop_->monotonic_now());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800662 status_builder.add_last_stop_reason(stop_reason_);
663 if (pid_ != -1) {
664 status_builder.add_pid(pid_);
665 status_builder.add_id(id_);
666 }
James Kuszmaul6295a642022-03-22 15:23:59 -0700667 // Note that even if process_info is null, calling add_process_info is fine.
668 status_builder.add_process_info(process_info);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800669 status_builder.add_last_start_time(start_time_.time_since_epoch().count());
James Kuszmaul37a56af2023-07-29 15:15:16 -0700670 status_builder.add_file_state(file_state_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800671 return status_builder.Finish();
672}
673
674void Application::Terminate() {
675 stop_reason_ = aos::starter::LastStopReason::TERMINATE;
676 DoStop(false);
677 terminating_ = true;
678}
679
680void Application::HandleCommand(aos::starter::Command cmd) {
681 switch (cmd) {
682 case aos::starter::Command::START: {
683 switch (status_) {
684 case aos::starter::State::WAITING: {
685 restart_timer_->Disable();
686 DoStart();
687 break;
688 }
689 case aos::starter::State::STARTING: {
690 break;
691 }
692 case aos::starter::State::RUNNING: {
693 break;
694 }
695 case aos::starter::State::STOPPING: {
696 queue_restart_ = true;
697 break;
698 }
699 case aos::starter::State::STOPPED: {
700 status_ = aos::starter::State::WAITING;
701 DoStart();
702 break;
703 }
704 }
705 break;
706 }
707 case aos::starter::Command::STOP: {
708 stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
709 DoStop(false);
710 break;
711 }
712 case aos::starter::Command::RESTART: {
713 stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
714 DoStop(true);
715 break;
716 }
717 }
718}
719
720bool Application::MaybeHandleSignal() {
721 int status;
722
Sarah Newman21c59202022-06-16 12:36:33 -0700723 if (status_ == aos::starter::State::WAITING ||
724 status_ == aos::starter::State::STOPPED) {
725 // We can't possibly have received a signal meant for this process.
726 return false;
727 }
728
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800729 // Check if the status of this process has changed
Sarah Newman21c59202022-06-16 12:36:33 -0700730 // The PID won't be -1 if this application has ever been run successfully
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800731 if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
732 return false;
733 }
734
735 // Check that the event was the process exiting
736 if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
737 return false;
738 }
739
James Kuszmauld42edb42022-01-07 18:00:16 -0800740 start_timer_->Disable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800741 exit_time_ = event_loop_->monotonic_now();
742 exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
James Kuszmaul37a56af2023-07-29 15:15:16 -0700743 file_state_ = FileState::NOT_RUNNING;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800744
James Kuszmauld42edb42022-01-07 18:00:16 -0800745 if (auto read_result = status_pipes_.read->Read()) {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800746 stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
747 }
748
James Kuszmaulb740f452023-11-14 17:44:29 -0800749 const std::string starter_version_string =
750 absl::StrCat("starter version '",
751 event_loop_->VersionString().value_or("unknown"), "'");
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800752 switch (status_) {
753 case aos::starter::State::STARTING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800754 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700755 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
756 << "Application '" << name_ << "' pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800757 << " exited with status " << exit_code_.value() << " and "
758 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800759 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700760 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
761 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700762 << "Failed to start '" << name_ << "' on pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800763 << " : Exited with status " << exit_code_.value() << " and "
764 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800765 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800766 if (autorestart()) {
767 QueueStart();
768 } else {
769 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700770 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800771 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800772 break;
773 }
774 case aos::starter::State::RUNNING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800775 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700776 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
777 << "Application '" << name_ << "' pid " << pid_
778 << " exited with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800779 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700780 if (quiet_flag_ == QuietLogging::kNo ||
781 quiet_flag_ == QuietLogging::kNotForDebugging) {
James Kuszmaulb740f452023-11-14 17:44:29 -0800782 const std::string version_string =
James Kuszmaul8544c492023-07-31 15:00:38 -0700783 latest_timing_report_version_.has_value()
James Kuszmaulb740f452023-11-14 17:44:29 -0800784 ? absl::StrCat("version '",
785 latest_timing_report_version_.value(), "'")
786 : starter_version_string;
James Kuszmaul8544c492023-07-31 15:00:38 -0700787 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
James Kuszmaulb740f452023-11-14 17:44:29 -0800788 << "Application '" << name_ << "' pid " << pid_ << " "
James Kuszmaul8544c492023-07-31 15:00:38 -0700789 << version_string << " exited unexpectedly with status "
790 << exit_code_.value();
791 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800792 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800793 if (autorestart()) {
794 QueueStart();
795 } else {
796 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700797 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800798 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800799 break;
800 }
801 case aos::starter::State::STOPPING: {
payton.rehl2841b1c2023-05-25 17:23:55 -0700802 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
803 << "Successfully stopped '" << name_ << "' pid: " << pid_
804 << " with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800805 status_ = aos::starter::State::STOPPED;
806
807 // Disable force stop timer since the process already died
808 stop_timer_->Disable();
809
Austin Schuh1cea9032023-07-10 11:56:40 -0700810 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800811 if (terminating_) {
812 return true;
813 }
814
815 if (queue_restart_) {
816 queue_restart_ = false;
817 status_ = aos::starter::State::WAITING;
818 DoStart();
819 }
820 break;
821 }
822 case aos::starter::State::WAITING:
823 case aos::starter::State::STOPPED: {
Sarah Newman21c59202022-06-16 12:36:33 -0700824 __builtin_unreachable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800825 break;
826 }
827 }
828
829 return false;
830}
831
Austin Schuh1cea9032023-07-10 11:56:40 -0700832void Application::OnChange() {
833 for (auto &fn : on_change_) {
834 fn();
835 }
836}
837
Adam Snaider70deaf22023-08-11 13:58:34 -0700838Application::~Application() {
839 start_timer_->Disable();
840 restart_timer_->Disable();
841 stop_timer_->Disable();
842 pipe_timer_->Disable();
843 child_status_handler_->Disable();
844}
845
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800846} // namespace aos::starter