blob: c36d59b91939beb24d5febe31a53e949b52fced9 [file] [log] [blame]
James Kuszmaul3224b8e2022-01-07 19:00:39 -08001#include "aos/starter/subprocess.h"
2
Stephan Pleinesf581a072024-05-23 20:59:27 -07003#include <errno.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -08004#include <grp.h>
5#include <pwd.h>
Stephan Pleinesf581a072024-05-23 20:59:27 -07006#include <signal.h>
7#include <stdlib.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -08008#include <sys/prctl.h>
Stephan Pleinesf581a072024-05-23 20:59:27 -07009#include <sys/stat.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -080010#include <sys/wait.h>
Stephan Pleinesf581a072024-05-23 20:59:27 -070011#include <unistd.h>
James Kuszmaul3224b8e2022-01-07 19:00:39 -080012
Stephan Pleinesf581a072024-05-23 20:59:27 -070013#include <compare>
14#include <iterator>
15#include <ostream>
16#include <ratio>
17
18#include "absl/strings/str_cat.h"
James Kuszmaul37a56af2023-07-29 15:15:16 -070019#include "absl/strings/str_split.h"
James Kuszmaul3224b8e2022-01-07 19:00:39 -080020#include "glog/logging.h"
21
Stephan Pleinesf581a072024-05-23 20:59:27 -070022#include "aos/util/file.h"
23#include "aos/util/process_info_generated.h"
James Kuszmaul8544c492023-07-31 15:00:38 -070024
James Kuszmaul3224b8e2022-01-07 19:00:39 -080025namespace aos::starter {
26
Philipp Schraderfa8fc492023-09-26 14:52:02 -070027// Blocks all signals while an instance of this class is in scope.
28class ScopedCompleteSignalBlocker {
29 public:
30 ScopedCompleteSignalBlocker() {
31 sigset_t mask;
32 sigfillset(&mask);
33 // Remember the current mask.
34 PCHECK(sigprocmask(SIG_SETMASK, &mask, &old_mask_) == 0);
35 }
36
37 ~ScopedCompleteSignalBlocker() {
38 // Restore the remembered mask.
39 PCHECK(sigprocmask(SIG_SETMASK, &old_mask_, nullptr) == 0);
40 }
41
42 private:
43 sigset_t old_mask_;
44};
45
James Kuszmaul37a56af2023-07-29 15:15:16 -070046namespace {
47std::optional<ino_t> GetInodeForPath(const std::filesystem::path &path) {
48 struct stat stat_buf;
49 if (0 != stat(path.c_str(), &stat_buf)) {
50 return std::nullopt;
51 }
52 return stat_buf.st_ino;
53}
54bool InodeChanged(const std::filesystem::path &path, ino_t previous_inode) {
55 const std::optional<ino_t> current_inode = GetInodeForPath(path);
56 if (!current_inode.has_value()) {
57 return true;
58 }
59 return current_inode.value() != previous_inode;
60}
61} // namespace
62
63std::filesystem::path ResolvePath(std::string_view command) {
64 std::filesystem::path command_path = command;
65 if (command.find("/") != std::string_view::npos) {
66 CHECK(std::filesystem::exists(command_path))
67 << ": " << command << " does not exist.";
68 return std::filesystem::canonical(command_path);
69 }
70 const char *system_path = getenv("PATH");
71 std::string system_path_buffer;
72 if (system_path == nullptr) {
73 const size_t default_path_length = confstr(_CS_PATH, nullptr, 0);
74 PCHECK(default_path_length != 0) << ": Unable to resolve " << command;
75 system_path_buffer.resize(default_path_length);
76 confstr(_CS_PATH, system_path_buffer.data(), system_path_buffer.size());
77 system_path = system_path_buffer.c_str();
78 VLOG(2) << "Using default path of " << system_path
79 << " in the absence of PATH being set.";
80 }
81 const std::vector<std::string_view> search_paths =
82 absl::StrSplit(system_path, ':');
83 for (const std::string_view search_path : search_paths) {
84 const std::filesystem::path candidate =
85 std::filesystem::path(search_path) / command_path;
86 if (std::filesystem::exists(candidate)) {
87 return std::filesystem::canonical(candidate);
88 }
89 }
90 LOG(FATAL) << "Unable to resolve " << command;
91}
92
Austin Schuhbbeb37e2022-08-17 16:19:27 -070093// RAII class to become root and restore back to the original user and group
94// afterwards.
95class Sudo {
96 public:
97 Sudo() {
98 // Save what we were.
99 PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
100 PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
101
102 // Become root.
103 PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
104 << ": Failed to become root";
105 PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
106 << ": Failed to become root";
107 }
108
109 ~Sudo() {
110 // And recover.
111 PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
112 PCHECK(setresuid(ruid_, euid_, suid_) == 0);
113 }
114
115 uid_t ruid_, euid_, suid_;
116 gid_t rgid_, egid_, sgid_;
117};
118
Austin Schuh77e20a32023-08-01 12:25:03 -0700119MemoryCGroup::MemoryCGroup(std::string_view name, Create should_create)
120 : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)),
121 should_create_(should_create) {
122 if (should_create_ == Create::kDoCreate) {
123 Sudo sudo;
124 int ret = mkdir(cgroup_.c_str(), 0755);
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700125
Austin Schuh77e20a32023-08-01 12:25:03 -0700126 if (ret != 0) {
127 if (errno == EEXIST) {
128 PCHECK(rmdir(cgroup_.c_str()) == 0)
129 << ": Failed to remove previous cgroup " << cgroup_;
130 ret = mkdir(cgroup_.c_str(), 0755);
131 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700132 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700133
Austin Schuh77e20a32023-08-01 12:25:03 -0700134 if (ret != 0) {
135 PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
136 << ", do you have permission?";
137 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700138 }
139}
140
141void MemoryCGroup::AddTid(pid_t pid) {
142 if (pid == 0) {
143 pid = getpid();
144 }
Austin Schuh77e20a32023-08-01 12:25:03 -0700145 if (should_create_ == Create::kDoCreate) {
146 Sudo sudo;
147 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
148 std::to_string(pid));
149 } else {
150 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
151 std::to_string(pid));
152 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700153}
154
155void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
Austin Schuh77e20a32023-08-01 12:25:03 -0700156 if (should_create_ == Create::kDoCreate) {
157 Sudo sudo;
158 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
159 std::to_string(limit_value));
160 } else {
161 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
162 std::to_string(limit_value));
163 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700164}
165
166MemoryCGroup::~MemoryCGroup() {
Austin Schuh77e20a32023-08-01 12:25:03 -0700167 if (should_create_ == Create::kDoCreate) {
168 Sudo sudo;
169 PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
170 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700171}
172
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800173SignalListener::SignalListener(aos::ShmEventLoop *loop,
174 std::function<void(signalfd_siginfo)> callback)
Austin Schuh1cea9032023-07-10 11:56:40 -0700175 : SignalListener(loop->epoll(), std::move(callback)) {}
176
177SignalListener::SignalListener(aos::internal::EPoll *epoll,
178 std::function<void(signalfd_siginfo)> callback)
179 : SignalListener(epoll, callback,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800180 {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
181 SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
182
183SignalListener::SignalListener(aos::ShmEventLoop *loop,
184 std::function<void(signalfd_siginfo)> callback,
185 std::initializer_list<unsigned int> signals)
Austin Schuh1cea9032023-07-10 11:56:40 -0700186 : SignalListener(loop->epoll(), std::move(callback), std::move(signals)) {}
187
188SignalListener::SignalListener(aos::internal::EPoll *epoll,
189 std::function<void(signalfd_siginfo)> callback,
190 std::initializer_list<unsigned int> signals)
191 : epoll_(epoll), callback_(std::move(callback)), signalfd_(signals) {
192 epoll_->OnReadable(signalfd_.fd(), [this] {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800193 signalfd_siginfo info = signalfd_.Read();
194
195 if (info.ssi_signo == 0) {
196 LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
197 return;
198 }
199
200 callback_(info);
201 });
202}
203
Austin Schuh1cea9032023-07-10 11:56:40 -0700204SignalListener::~SignalListener() { epoll_->DeleteFd(signalfd_.fd()); }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800205
James Kuszmauld42edb42022-01-07 18:00:16 -0800206Application::Application(std::string_view name,
207 std::string_view executable_name,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800208 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700209 std::function<void()> on_change,
210 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800211 : name_(name),
James Kuszmaul37a56af2023-07-29 15:15:16 -0700212 path_(ResolvePath(executable_name)),
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800213 event_loop_(event_loop),
214 start_timer_(event_loop_->AddTimer([this] {
215 status_ = aos::starter::State::RUNNING;
payton.rehl2841b1c2023-05-25 17:23:55 -0700216 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
217 << "Started '" << name_ << "' pid: " << pid_;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700218 // Check if the file on disk changed while we were starting up. We allow
219 // this state for the same reason that we don't just use /proc/$pid/exe
220 // to determine if the file is deleted--we may be running a script or
221 // sudo or the such and determining the state of the file that we
222 // actually care about sounds like more work than we want to deal with.
223 if (InodeChanged(path_, pre_fork_inode_)) {
224 file_state_ = FileState::CHANGED_DURING_STARTUP;
225 } else {
226 file_state_ = FileState::NO_CHANGE;
227 }
228
229 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800230 })),
231 restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
232 stop_timer_(event_loop_->AddTimer([this] {
233 if (kill(pid_, SIGKILL) == 0) {
Philipp Schrader595979d2023-09-13 11:31:48 -0700234 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
235 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700236 << "Failed to stop, sending SIGKILL to '" << name_
237 << "' pid: " << pid_;
Sarah Newman9687e062023-09-08 12:22:27 -0700238 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700239 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
240 quiet_flag_ == QuietLogging::kNotForDebugging)
Sarah Newman9687e062023-09-08 12:22:27 -0700241 << "Failed to send SIGKILL to '" << name_ << "' pid: " << pid_;
242 stop_timer_->Schedule(event_loop_->monotonic_now() +
243 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800244 }
245 })),
James Kuszmauld42edb42022-01-07 18:00:16 -0800246 pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
247 child_status_handler_(
248 event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
Austin Schuh1cea9032023-07-10 11:56:40 -0700249 on_change_({on_change}),
payton.rehl2841b1c2023-05-25 17:23:55 -0700250 quiet_flag_(quiet_flag) {
James Kuszmaul06a8f352024-03-15 14:15:57 -0700251 // Keep the length of the timer name bounded to some reasonable length.
252 start_timer_->set_name(absl::StrCat("app_start_", name.substr(0, 10)));
253 restart_timer_->set_name(absl::StrCat("app_restart_", name.substr(0, 10)));
254 stop_timer_->set_name(absl::StrCat("app_stop_", name.substr(0, 10)));
255 pipe_timer_->set_name(absl::StrCat("app_pipe_", name.substr(0, 10)));
256 child_status_handler_->set_name(
257 absl::StrCat("app_status_handler_", name.substr(0, 10)));
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700258 // Every second poll to check if the child is dead. This is used as a
James Kuszmaul06a8f352024-03-15 14:15:57 -0700259 // default for the case where the user is not directly catching SIGCHLD
260 // and calling MaybeHandleSignal for us.
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700261 child_status_handler_->Schedule(event_loop_->monotonic_now(),
262 std::chrono::seconds(1));
James Kuszmauld42edb42022-01-07 18:00:16 -0800263}
264
265Application::Application(const aos::Application *application,
266 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700267 std::function<void()> on_change,
268 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800269 : Application(application->name()->string_view(),
270 application->has_executable_name()
271 ? application->executable_name()->string_view()
272 : application->name()->string_view(),
payton.rehl2841b1c2023-05-25 17:23:55 -0700273 event_loop, on_change, quiet_flag) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800274 user_name_ = application->has_user() ? application->user()->str() : "";
275 user_ = application->has_user() ? FindUid(user_name_.c_str()) : std::nullopt;
276 group_ = application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
277 : std::nullopt;
278 autostart_ = application->autostart();
279 autorestart_ = application->autorestart();
280 if (application->has_args()) {
281 set_args(*application->args());
282 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700283
284 if (application->has_memory_limit() && application->memory_limit() > 0) {
285 SetMemoryLimit(application->memory_limit());
286 }
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800287
288 set_stop_grace_period(std::chrono::nanoseconds(application->stop_time()));
James Kuszmauld42edb42022-01-07 18:00:16 -0800289}
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800290
291void Application::DoStart() {
292 if (status_ != aos::starter::State::WAITING) {
293 return;
294 }
295
296 start_timer_->Disable();
297 restart_timer_->Disable();
298
James Kuszmauld42edb42022-01-07 18:00:16 -0800299 status_pipes_ = util::ScopedPipe::MakePipe();
300
301 if (capture_stdout_) {
302 stdout_pipes_ = util::ScopedPipe::MakePipe();
303 stdout_.clear();
304 }
305 if (capture_stderr_) {
306 stderr_pipes_ = util::ScopedPipe::MakePipe();
307 stderr_.clear();
308 }
309
Philipp Schradera6712522023-07-05 20:25:11 -0700310 pipe_timer_->Schedule(event_loop_->monotonic_now(),
311 std::chrono::milliseconds(100));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800312
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700313 {
314 // Block all signals during the fork() call. Together with the default
315 // signal handler restoration below, This prevents signal handlers from
316 // getting called in the child and accidentally affecting the parent. In
317 // particular, the exit handler for shm_event_loop could be called here if
318 // we don't exec() quickly enough.
319 ScopedCompleteSignalBlocker signal_blocker;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700320 {
321 const std::optional<ino_t> inode = GetInodeForPath(path_);
322 CHECK(inode.has_value())
323 << ": " << path_ << " does not seem to be stat'able.";
324 pre_fork_inode_ = inode.value();
325 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700326 const pid_t pid = fork();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800327
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700328 if (pid != 0) {
329 if (pid == -1) {
330 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
331 quiet_flag_ == QuietLogging::kNotForDebugging)
332 << "Failed to fork '" << name_ << "'";
333 stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
334 status_ = aos::starter::State::STOPPED;
335 } else {
336 pid_ = pid;
337 id_ = next_id_++;
338 start_time_ = event_loop_->monotonic_now();
339 status_ = aos::starter::State::STARTING;
340 latest_timing_report_version_.reset();
341 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
342 << "Starting '" << name_ << "' pid " << pid_;
343
344 // Set up timer which moves application to RUNNING state if it is still
345 // alive in 1 second.
346 start_timer_->Schedule(event_loop_->monotonic_now() +
347 std::chrono::seconds(1));
348 // Since we are the parent process, clear our write-side of all the
349 // pipes.
350 status_pipes_.write.reset();
351 stdout_pipes_.write.reset();
352 stderr_pipes_.write.reset();
353 }
354 OnChange();
355 return;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800356 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700357
358 // Clear any signal handlers so that they don't accidentally interfere with
359 // the parent process. Is there a better way to iterate over all the
360 // signals? Right now we're just dealing with the most common ones.
361 for (int signal : {SIGINT, SIGHUP, SIGTERM}) {
362 struct sigaction action;
363 sigemptyset(&action.sa_mask);
364 action.sa_flags = 0;
365 action.sa_handler = SIG_DFL;
366 PCHECK(sigaction(signal, &action, nullptr) == 0);
367 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800368 }
369
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700370 if (memory_cgroup_) {
371 memory_cgroup_->AddTid();
372 }
373
James Kuszmauld42edb42022-01-07 18:00:16 -0800374 // Since we are the child process, clear our read-side of all the pipes.
375 status_pipes_.read.reset();
376 stdout_pipes_.read.reset();
377 stderr_pipes_.read.reset();
378
379 // The status pipe will not be needed if the execve succeeds.
380 status_pipes_.write->SetCloexec();
381
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800382 // Clear out signal mask of parent so forked process receives all signals
383 // normally.
384 sigset_t empty_mask;
385 sigemptyset(&empty_mask);
386 sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
387
388 // Cleanup children if starter dies in a way that is not handled gracefully.
389 if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800390 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800391 static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
392 PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
393 }
394
395 if (group_) {
396 CHECK(!user_name_.empty());
397 // The manpage for setgroups says we just need CAP_SETGID, but empirically
398 // we also need the effective UID to be 0 to make it work. user_ must also
399 // be set so we change this effective UID back later.
400 CHECK(user_);
401 if (seteuid(0) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800402 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800403 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
404 PLOG(FATAL) << "Could not seteuid(0) for " << name_
405 << " in preparation for setting groups";
406 }
407 if (initgroups(user_name_.c_str(), *group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800408 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800409 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
410 PLOG(FATAL) << "Could not initialize normal groups for " << name_
411 << " as " << user_name_ << " with " << *group_;
412 }
413 if (setgid(*group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800414 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800415 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
416 PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
417 }
418 }
419
420 if (user_) {
421 if (setuid(*user_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800422 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800423 static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
424 PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
425 }
426 }
427
James Kuszmauld42edb42022-01-07 18:00:16 -0800428 if (capture_stdout_) {
429 PCHECK(STDOUT_FILENO == dup2(stdout_pipes_.write->fd(), STDOUT_FILENO));
430 stdout_pipes_.write.reset();
431 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800432
James Kuszmauld42edb42022-01-07 18:00:16 -0800433 if (capture_stderr_) {
434 PCHECK(STDERR_FILENO == dup2(stderr_pipes_.write->fd(), STDERR_FILENO));
435 stderr_pipes_.write.reset();
436 }
437
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700438 if (run_as_sudo_) {
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700439 // For sudo we must supply the actual path
James Kuszmaul37a56af2023-07-29 15:15:16 -0700440 args_.insert(args_.begin(), path_.c_str());
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700441 args_.insert(args_.begin(), kSudo);
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700442 } else {
443 // argv[0] should be the program name
444 args_.insert(args_.begin(), name_);
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700445 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800446
447 std::vector<char *> cargs = CArgs();
Philipp Schrader790cb542023-07-05 21:06:52 -0700448 const char *path = run_as_sudo_ ? kSudo : path_.c_str();
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700449 execvp(path, cargs.data());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800450
451 // If we got here, something went wrong
James Kuszmauld42edb42022-01-07 18:00:16 -0800452 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800453 static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
Philipp Schrader595979d2023-09-13 11:31:48 -0700454 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
455 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700456 << "Could not execute " << name_ << " (" << path_ << ')';
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800457
458 _exit(EXIT_FAILURE);
459}
460
James Kuszmaul8544c492023-07-31 15:00:38 -0700461void Application::ObserveTimingReport(
462 const aos::monotonic_clock::time_point send_time,
463 const aos::timing::Report *msg) {
464 if (msg->name()->string_view() == name_ && msg->pid() == pid_ &&
465 msg->has_version()) {
466 latest_timing_report_version_ = msg->version()->str();
467 last_timing_report_ = send_time;
468 }
469}
470
James Kuszmauld42edb42022-01-07 18:00:16 -0800471void Application::FetchOutputs() {
472 if (capture_stdout_) {
473 stdout_pipes_.read->Read(&stdout_);
474 }
475 if (capture_stderr_) {
476 stderr_pipes_.read->Read(&stderr_);
477 }
478}
479
480const std::string &Application::GetStdout() {
481 CHECK(capture_stdout_);
482 FetchOutputs();
483 return stdout_;
484}
485
486const std::string &Application::GetStderr() {
487 CHECK(capture_stderr_);
488 FetchOutputs();
489 return stderr_;
490}
491
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800492void Application::DoStop(bool restart) {
493 // If stop or restart received, the old state of these is no longer applicable
494 // so cancel both.
495 restart_timer_->Disable();
496 start_timer_->Disable();
497
James Kuszmauld42edb42022-01-07 18:00:16 -0800498 FetchOutputs();
499
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800500 switch (status_) {
501 case aos::starter::State::STARTING:
502 case aos::starter::State::RUNNING: {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700503 file_state_ = FileState::NOT_RUNNING;
Philipp Schrader595979d2023-09-13 11:31:48 -0700504 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
505 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700506 << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
507 << SIGINT;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800508 status_ = aos::starter::State::STOPPING;
509
Philipp Schrader595979d2023-09-13 11:31:48 -0700510 if (kill(pid_, SIGINT) != 0) {
511 PLOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
512 quiet_flag_ == QuietLogging::kNotForDebugging)
513 << "Failed to send signal " << SIGINT << " to '" << name_
514 << "' pid: " << pid_;
515 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800516
517 // Watchdog timer to SIGKILL application if it is still running 1 second
518 // after SIGINT
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800519 stop_timer_->Schedule(event_loop_->monotonic_now() + stop_grace_period_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800520 queue_restart_ = restart;
Austin Schuh1cea9032023-07-10 11:56:40 -0700521 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800522 break;
523 }
524 case aos::starter::State::WAITING: {
525 // If waiting to restart, and receives restart, skip the waiting period
526 // and restart immediately. If stop received, all we have to do is move
527 // to the STOPPED state.
528 if (restart) {
529 DoStart();
530 } else {
531 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700532 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800533 }
534 break;
535 }
536 case aos::starter::State::STOPPING: {
537 // If the application is already stopping, then we just need to update the
538 // restart flag to the most recent status.
539 queue_restart_ = restart;
540 break;
541 }
542 case aos::starter::State::STOPPED: {
543 // Restart immediately if the application is already stopped
544 if (restart) {
545 status_ = aos::starter::State::WAITING;
546 DoStart();
547 }
548 break;
549 }
550 }
551}
552
553void Application::QueueStart() {
554 status_ = aos::starter::State::WAITING;
555
payton.rehl2841b1c2023-05-25 17:23:55 -0700556 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
557 << "Restarting " << name_ << " in 3 seconds";
Philipp Schradera6712522023-07-05 20:25:11 -0700558 restart_timer_->Schedule(event_loop_->monotonic_now() +
559 std::chrono::seconds(3));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800560 start_timer_->Disable();
561 stop_timer_->Disable();
Austin Schuh1cea9032023-07-10 11:56:40 -0700562 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800563}
564
James Kuszmauld42edb42022-01-07 18:00:16 -0800565std::vector<char *> Application::CArgs() {
566 std::vector<char *> cargs;
567 std::transform(args_.begin(), args_.end(), std::back_inserter(cargs),
568 [](std::string &str) { return str.data(); });
569 cargs.push_back(nullptr);
570 return cargs;
571}
572
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800573void Application::set_args(
574 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
575 args_.clear();
576 std::transform(v.begin(), v.end(), std::back_inserter(args_),
James Kuszmauld42edb42022-01-07 18:00:16 -0800577 [](const flatbuffers::String *str) { return str->str(); });
578}
579
580void Application::set_args(std::vector<std::string> args) {
581 args_ = std::move(args);
582}
583
584void Application::set_capture_stdout(bool capture) {
585 capture_stdout_ = capture;
586}
587
588void Application::set_capture_stderr(bool capture) {
589 capture_stderr_ = capture;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800590}
591
592std::optional<uid_t> Application::FindUid(const char *name) {
593 // TODO(austin): Use the reentrant version. This should be safe.
594 struct passwd *user_data = getpwnam(name);
595 if (user_data != nullptr) {
596 return user_data->pw_uid;
597 } else {
598 LOG(FATAL) << "Could not find user " << name;
599 return std::nullopt;
600 }
601}
602
603std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
604 // TODO(austin): Use the reentrant version. This should be safe.
605 struct passwd *user_data = getpwnam(name);
606 if (user_data != nullptr) {
607 return user_data->pw_gid;
608 } else {
609 LOG(FATAL) << "Could not find user " << name;
610 return std::nullopt;
611 }
612}
613
James Kuszmaul37a56af2023-07-29 15:15:16 -0700614FileState Application::UpdateFileState() {
615 // On every call, check if a different file is present on disk. Note that
616 // while the applications is running, the file cannot be changed without the
617 // inode changing.
618 // We could presumably use inotify or the such to watch the file instead,
619 // but this works and we do not expect substantial cost from reading the inode
620 // of a file every time we send out a status message.
621 if (InodeChanged(path_, pre_fork_inode_)) {
622 switch (file_state_) {
623 case FileState::NO_CHANGE:
624 file_state_ = FileState::CHANGED;
625 break;
626 case FileState::NOT_RUNNING:
627 case FileState::CHANGED_DURING_STARTUP:
628 case FileState::CHANGED:
629 break;
630 }
631 }
632 return file_state_;
633}
634
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800635flatbuffers::Offset<aos::starter::ApplicationStatus>
James Kuszmaul6295a642022-03-22 15:23:59 -0700636Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
637 util::Top *top) {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700638 UpdateFileState();
639
Austin Schuh6bdcc372024-06-27 14:49:11 -0700640 CHECK(builder != nullptr);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800641 auto name_fbs = builder->CreateString(name_);
642
James Kuszmaul6295a642022-03-22 15:23:59 -0700643 const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
644 const flatbuffers::Offset<util::ProcessInfo> process_info =
645 valid_pid ? top->InfoForProcess(builder, pid_)
646 : flatbuffers::Offset<util::ProcessInfo>();
647
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800648 aos::starter::ApplicationStatus::Builder status_builder(*builder);
649 status_builder.add_name(name_fbs);
650 status_builder.add_state(status_);
James Kuszmauld42edb42022-01-07 18:00:16 -0800651 if (exit_code_.has_value()) {
652 status_builder.add_last_exit_code(exit_code_.value());
653 }
James Kuszmaul8544c492023-07-31 15:00:38 -0700654 status_builder.add_has_active_timing_report(
655 last_timing_report_ +
656 // Leave a bit of margin on the timing report receipt time, to allow
657 // for timing errors.
658 3 * std::chrono::milliseconds(FLAGS_timing_report_ms) >
659 event_loop_->monotonic_now());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800660 status_builder.add_last_stop_reason(stop_reason_);
661 if (pid_ != -1) {
662 status_builder.add_pid(pid_);
663 status_builder.add_id(id_);
664 }
James Kuszmaul6295a642022-03-22 15:23:59 -0700665 // Note that even if process_info is null, calling add_process_info is fine.
666 status_builder.add_process_info(process_info);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800667 status_builder.add_last_start_time(start_time_.time_since_epoch().count());
James Kuszmaul37a56af2023-07-29 15:15:16 -0700668 status_builder.add_file_state(file_state_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800669 return status_builder.Finish();
670}
671
672void Application::Terminate() {
673 stop_reason_ = aos::starter::LastStopReason::TERMINATE;
674 DoStop(false);
675 terminating_ = true;
676}
677
678void Application::HandleCommand(aos::starter::Command cmd) {
679 switch (cmd) {
680 case aos::starter::Command::START: {
681 switch (status_) {
682 case aos::starter::State::WAITING: {
683 restart_timer_->Disable();
684 DoStart();
685 break;
686 }
687 case aos::starter::State::STARTING: {
688 break;
689 }
690 case aos::starter::State::RUNNING: {
691 break;
692 }
693 case aos::starter::State::STOPPING: {
694 queue_restart_ = true;
695 break;
696 }
697 case aos::starter::State::STOPPED: {
698 status_ = aos::starter::State::WAITING;
699 DoStart();
700 break;
701 }
702 }
703 break;
704 }
705 case aos::starter::Command::STOP: {
706 stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
707 DoStop(false);
708 break;
709 }
710 case aos::starter::Command::RESTART: {
711 stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
712 DoStop(true);
713 break;
714 }
715 }
716}
717
718bool Application::MaybeHandleSignal() {
719 int status;
720
Sarah Newman21c59202022-06-16 12:36:33 -0700721 if (status_ == aos::starter::State::WAITING ||
722 status_ == aos::starter::State::STOPPED) {
723 // We can't possibly have received a signal meant for this process.
724 return false;
725 }
726
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800727 // Check if the status of this process has changed
Sarah Newman21c59202022-06-16 12:36:33 -0700728 // The PID won't be -1 if this application has ever been run successfully
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800729 if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
730 return false;
731 }
732
733 // Check that the event was the process exiting
734 if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
735 return false;
736 }
737
James Kuszmauld42edb42022-01-07 18:00:16 -0800738 start_timer_->Disable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800739 exit_time_ = event_loop_->monotonic_now();
740 exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
James Kuszmaul37a56af2023-07-29 15:15:16 -0700741 file_state_ = FileState::NOT_RUNNING;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800742
James Kuszmauld42edb42022-01-07 18:00:16 -0800743 if (auto read_result = status_pipes_.read->Read()) {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800744 stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
745 }
746
James Kuszmaulb740f452023-11-14 17:44:29 -0800747 const std::string starter_version_string =
748 absl::StrCat("starter version '",
749 event_loop_->VersionString().value_or("unknown"), "'");
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800750 switch (status_) {
751 case aos::starter::State::STARTING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800752 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700753 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
754 << "Application '" << name_ << "' pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800755 << " exited with status " << exit_code_.value() << " and "
756 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800757 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700758 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
759 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700760 << "Failed to start '" << name_ << "' on pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800761 << " : Exited with status " << exit_code_.value() << " and "
762 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800763 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800764 if (autorestart()) {
765 QueueStart();
766 } else {
767 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700768 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800769 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800770 break;
771 }
772 case aos::starter::State::RUNNING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800773 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700774 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
775 << "Application '" << name_ << "' pid " << pid_
776 << " exited with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800777 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700778 if (quiet_flag_ == QuietLogging::kNo ||
779 quiet_flag_ == QuietLogging::kNotForDebugging) {
James Kuszmaulb740f452023-11-14 17:44:29 -0800780 const std::string version_string =
James Kuszmaul8544c492023-07-31 15:00:38 -0700781 latest_timing_report_version_.has_value()
James Kuszmaulb740f452023-11-14 17:44:29 -0800782 ? absl::StrCat("version '",
783 latest_timing_report_version_.value(), "'")
784 : starter_version_string;
James Kuszmaul8544c492023-07-31 15:00:38 -0700785 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
James Kuszmaulb740f452023-11-14 17:44:29 -0800786 << "Application '" << name_ << "' pid " << pid_ << " "
James Kuszmaul8544c492023-07-31 15:00:38 -0700787 << version_string << " exited unexpectedly with status "
788 << exit_code_.value();
789 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800790 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800791 if (autorestart()) {
792 QueueStart();
793 } else {
794 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700795 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800796 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800797 break;
798 }
799 case aos::starter::State::STOPPING: {
payton.rehl2841b1c2023-05-25 17:23:55 -0700800 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
801 << "Successfully stopped '" << name_ << "' pid: " << pid_
802 << " with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800803 status_ = aos::starter::State::STOPPED;
804
805 // Disable force stop timer since the process already died
806 stop_timer_->Disable();
807
Austin Schuh1cea9032023-07-10 11:56:40 -0700808 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800809 if (terminating_) {
810 return true;
811 }
812
813 if (queue_restart_) {
814 queue_restart_ = false;
815 status_ = aos::starter::State::WAITING;
816 DoStart();
817 }
818 break;
819 }
820 case aos::starter::State::WAITING:
821 case aos::starter::State::STOPPED: {
Sarah Newman21c59202022-06-16 12:36:33 -0700822 __builtin_unreachable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800823 break;
824 }
825 }
826
827 return false;
828}
829
Austin Schuh1cea9032023-07-10 11:56:40 -0700830void Application::OnChange() {
831 for (auto &fn : on_change_) {
832 fn();
833 }
834}
835
Adam Snaider70deaf22023-08-11 13:58:34 -0700836Application::~Application() {
837 start_timer_->Disable();
838 restart_timer_->Disable();
839 stop_timer_->Disable();
840 pipe_timer_->Disable();
841 child_status_handler_->Disable();
842}
843
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800844} // namespace aos::starter