blob: d677922065737b98362d8ef4b282d96a4828efe3 [file] [log] [blame]
James Kuszmaul3224b8e2022-01-07 19:00:39 -08001#include "aos/starter/subprocess.h"
2
3#include <grp.h>
4#include <pwd.h>
5#include <sys/prctl.h>
6#include <sys/types.h>
7#include <sys/wait.h>
8
James Kuszmaul37a56af2023-07-29 15:15:16 -07009#include "absl/strings/str_split.h"
James Kuszmaul3224b8e2022-01-07 19:00:39 -080010#include "glog/logging.h"
11
James Kuszmaul8544c492023-07-31 15:00:38 -070012#include "aos/flatbuffer_merge.h"
13
James Kuszmaul3224b8e2022-01-07 19:00:39 -080014namespace aos::starter {
15
Philipp Schraderfa8fc492023-09-26 14:52:02 -070016// Blocks all signals while an instance of this class is in scope.
17class ScopedCompleteSignalBlocker {
18 public:
19 ScopedCompleteSignalBlocker() {
20 sigset_t mask;
21 sigfillset(&mask);
22 // Remember the current mask.
23 PCHECK(sigprocmask(SIG_SETMASK, &mask, &old_mask_) == 0);
24 }
25
26 ~ScopedCompleteSignalBlocker() {
27 // Restore the remembered mask.
28 PCHECK(sigprocmask(SIG_SETMASK, &old_mask_, nullptr) == 0);
29 }
30
31 private:
32 sigset_t old_mask_;
33};
34
James Kuszmaul37a56af2023-07-29 15:15:16 -070035namespace {
36std::optional<ino_t> GetInodeForPath(const std::filesystem::path &path) {
37 struct stat stat_buf;
38 if (0 != stat(path.c_str(), &stat_buf)) {
39 return std::nullopt;
40 }
41 return stat_buf.st_ino;
42}
43bool InodeChanged(const std::filesystem::path &path, ino_t previous_inode) {
44 const std::optional<ino_t> current_inode = GetInodeForPath(path);
45 if (!current_inode.has_value()) {
46 return true;
47 }
48 return current_inode.value() != previous_inode;
49}
50} // namespace
51
52std::filesystem::path ResolvePath(std::string_view command) {
53 std::filesystem::path command_path = command;
54 if (command.find("/") != std::string_view::npos) {
55 CHECK(std::filesystem::exists(command_path))
56 << ": " << command << " does not exist.";
57 return std::filesystem::canonical(command_path);
58 }
59 const char *system_path = getenv("PATH");
60 std::string system_path_buffer;
61 if (system_path == nullptr) {
62 const size_t default_path_length = confstr(_CS_PATH, nullptr, 0);
63 PCHECK(default_path_length != 0) << ": Unable to resolve " << command;
64 system_path_buffer.resize(default_path_length);
65 confstr(_CS_PATH, system_path_buffer.data(), system_path_buffer.size());
66 system_path = system_path_buffer.c_str();
67 VLOG(2) << "Using default path of " << system_path
68 << " in the absence of PATH being set.";
69 }
70 const std::vector<std::string_view> search_paths =
71 absl::StrSplit(system_path, ':');
72 for (const std::string_view search_path : search_paths) {
73 const std::filesystem::path candidate =
74 std::filesystem::path(search_path) / command_path;
75 if (std::filesystem::exists(candidate)) {
76 return std::filesystem::canonical(candidate);
77 }
78 }
79 LOG(FATAL) << "Unable to resolve " << command;
80}
81
Austin Schuhbbeb37e2022-08-17 16:19:27 -070082// RAII class to become root and restore back to the original user and group
83// afterwards.
84class Sudo {
85 public:
86 Sudo() {
87 // Save what we were.
88 PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
89 PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
90
91 // Become root.
92 PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
93 << ": Failed to become root";
94 PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
95 << ": Failed to become root";
96 }
97
98 ~Sudo() {
99 // And recover.
100 PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
101 PCHECK(setresuid(ruid_, euid_, suid_) == 0);
102 }
103
104 uid_t ruid_, euid_, suid_;
105 gid_t rgid_, egid_, sgid_;
106};
107
Austin Schuh77e20a32023-08-01 12:25:03 -0700108MemoryCGroup::MemoryCGroup(std::string_view name, Create should_create)
109 : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)),
110 should_create_(should_create) {
111 if (should_create_ == Create::kDoCreate) {
112 Sudo sudo;
113 int ret = mkdir(cgroup_.c_str(), 0755);
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700114
Austin Schuh77e20a32023-08-01 12:25:03 -0700115 if (ret != 0) {
116 if (errno == EEXIST) {
117 PCHECK(rmdir(cgroup_.c_str()) == 0)
118 << ": Failed to remove previous cgroup " << cgroup_;
119 ret = mkdir(cgroup_.c_str(), 0755);
120 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700121 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700122
Austin Schuh77e20a32023-08-01 12:25:03 -0700123 if (ret != 0) {
124 PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
125 << ", do you have permission?";
126 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700127 }
128}
129
130void MemoryCGroup::AddTid(pid_t pid) {
131 if (pid == 0) {
132 pid = getpid();
133 }
Austin Schuh77e20a32023-08-01 12:25:03 -0700134 if (should_create_ == Create::kDoCreate) {
135 Sudo sudo;
136 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
137 std::to_string(pid));
138 } else {
139 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
140 std::to_string(pid));
141 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700142}
143
144void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
Austin Schuh77e20a32023-08-01 12:25:03 -0700145 if (should_create_ == Create::kDoCreate) {
146 Sudo sudo;
147 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
148 std::to_string(limit_value));
149 } else {
150 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
151 std::to_string(limit_value));
152 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700153}
154
155MemoryCGroup::~MemoryCGroup() {
Austin Schuh77e20a32023-08-01 12:25:03 -0700156 if (should_create_ == Create::kDoCreate) {
157 Sudo sudo;
158 PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
159 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700160}
161
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800162SignalListener::SignalListener(aos::ShmEventLoop *loop,
163 std::function<void(signalfd_siginfo)> callback)
Austin Schuh1cea9032023-07-10 11:56:40 -0700164 : SignalListener(loop->epoll(), std::move(callback)) {}
165
166SignalListener::SignalListener(aos::internal::EPoll *epoll,
167 std::function<void(signalfd_siginfo)> callback)
168 : SignalListener(epoll, callback,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800169 {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
170 SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
171
172SignalListener::SignalListener(aos::ShmEventLoop *loop,
173 std::function<void(signalfd_siginfo)> callback,
174 std::initializer_list<unsigned int> signals)
Austin Schuh1cea9032023-07-10 11:56:40 -0700175 : SignalListener(loop->epoll(), std::move(callback), std::move(signals)) {}
176
177SignalListener::SignalListener(aos::internal::EPoll *epoll,
178 std::function<void(signalfd_siginfo)> callback,
179 std::initializer_list<unsigned int> signals)
180 : epoll_(epoll), callback_(std::move(callback)), signalfd_(signals) {
181 epoll_->OnReadable(signalfd_.fd(), [this] {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800182 signalfd_siginfo info = signalfd_.Read();
183
184 if (info.ssi_signo == 0) {
185 LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
186 return;
187 }
188
189 callback_(info);
190 });
191}
192
Austin Schuh1cea9032023-07-10 11:56:40 -0700193SignalListener::~SignalListener() { epoll_->DeleteFd(signalfd_.fd()); }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800194
James Kuszmauld42edb42022-01-07 18:00:16 -0800195Application::Application(std::string_view name,
196 std::string_view executable_name,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800197 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700198 std::function<void()> on_change,
199 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800200 : name_(name),
James Kuszmaul37a56af2023-07-29 15:15:16 -0700201 path_(ResolvePath(executable_name)),
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800202 event_loop_(event_loop),
203 start_timer_(event_loop_->AddTimer([this] {
204 status_ = aos::starter::State::RUNNING;
payton.rehl2841b1c2023-05-25 17:23:55 -0700205 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
206 << "Started '" << name_ << "' pid: " << pid_;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700207 // Check if the file on disk changed while we were starting up. We allow
208 // this state for the same reason that we don't just use /proc/$pid/exe
209 // to determine if the file is deleted--we may be running a script or
210 // sudo or the such and determining the state of the file that we
211 // actually care about sounds like more work than we want to deal with.
212 if (InodeChanged(path_, pre_fork_inode_)) {
213 file_state_ = FileState::CHANGED_DURING_STARTUP;
214 } else {
215 file_state_ = FileState::NO_CHANGE;
216 }
217
218 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800219 })),
220 restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
221 stop_timer_(event_loop_->AddTimer([this] {
222 if (kill(pid_, SIGKILL) == 0) {
Philipp Schrader595979d2023-09-13 11:31:48 -0700223 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
224 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700225 << "Failed to stop, sending SIGKILL to '" << name_
226 << "' pid: " << pid_;
Sarah Newman9687e062023-09-08 12:22:27 -0700227 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700228 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
229 quiet_flag_ == QuietLogging::kNotForDebugging)
Sarah Newman9687e062023-09-08 12:22:27 -0700230 << "Failed to send SIGKILL to '" << name_ << "' pid: " << pid_;
231 stop_timer_->Schedule(event_loop_->monotonic_now() +
232 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800233 }
234 })),
James Kuszmauld42edb42022-01-07 18:00:16 -0800235 pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
236 child_status_handler_(
237 event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
Austin Schuh1cea9032023-07-10 11:56:40 -0700238 on_change_({on_change}),
payton.rehl2841b1c2023-05-25 17:23:55 -0700239 quiet_flag_(quiet_flag) {
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700240 // Every second poll to check if the child is dead. This is used as a
241 // default for the case where the user is not directly catching SIGCHLD and
242 // calling MaybeHandleSignal for us.
243 child_status_handler_->Schedule(event_loop_->monotonic_now(),
244 std::chrono::seconds(1));
James Kuszmauld42edb42022-01-07 18:00:16 -0800245}
246
247Application::Application(const aos::Application *application,
248 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700249 std::function<void()> on_change,
250 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800251 : Application(application->name()->string_view(),
252 application->has_executable_name()
253 ? application->executable_name()->string_view()
254 : application->name()->string_view(),
payton.rehl2841b1c2023-05-25 17:23:55 -0700255 event_loop, on_change, quiet_flag) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800256 user_name_ = application->has_user() ? application->user()->str() : "";
257 user_ = application->has_user() ? FindUid(user_name_.c_str()) : std::nullopt;
258 group_ = application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
259 : std::nullopt;
260 autostart_ = application->autostart();
261 autorestart_ = application->autorestart();
262 if (application->has_args()) {
263 set_args(*application->args());
264 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700265
266 if (application->has_memory_limit() && application->memory_limit() > 0) {
267 SetMemoryLimit(application->memory_limit());
268 }
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800269
270 set_stop_grace_period(std::chrono::nanoseconds(application->stop_time()));
James Kuszmauld42edb42022-01-07 18:00:16 -0800271}
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800272
273void Application::DoStart() {
274 if (status_ != aos::starter::State::WAITING) {
275 return;
276 }
277
278 start_timer_->Disable();
279 restart_timer_->Disable();
280
James Kuszmauld42edb42022-01-07 18:00:16 -0800281 status_pipes_ = util::ScopedPipe::MakePipe();
282
283 if (capture_stdout_) {
284 stdout_pipes_ = util::ScopedPipe::MakePipe();
285 stdout_.clear();
286 }
287 if (capture_stderr_) {
288 stderr_pipes_ = util::ScopedPipe::MakePipe();
289 stderr_.clear();
290 }
291
Philipp Schradera6712522023-07-05 20:25:11 -0700292 pipe_timer_->Schedule(event_loop_->monotonic_now(),
293 std::chrono::milliseconds(100));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800294
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700295 {
296 // Block all signals during the fork() call. Together with the default
297 // signal handler restoration below, This prevents signal handlers from
298 // getting called in the child and accidentally affecting the parent. In
299 // particular, the exit handler for shm_event_loop could be called here if
300 // we don't exec() quickly enough.
301 ScopedCompleteSignalBlocker signal_blocker;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700302 {
303 const std::optional<ino_t> inode = GetInodeForPath(path_);
304 CHECK(inode.has_value())
305 << ": " << path_ << " does not seem to be stat'able.";
306 pre_fork_inode_ = inode.value();
307 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700308 const pid_t pid = fork();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800309
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700310 if (pid != 0) {
311 if (pid == -1) {
312 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
313 quiet_flag_ == QuietLogging::kNotForDebugging)
314 << "Failed to fork '" << name_ << "'";
315 stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
316 status_ = aos::starter::State::STOPPED;
317 } else {
318 pid_ = pid;
319 id_ = next_id_++;
320 start_time_ = event_loop_->monotonic_now();
321 status_ = aos::starter::State::STARTING;
322 latest_timing_report_version_.reset();
323 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
324 << "Starting '" << name_ << "' pid " << pid_;
325
326 // Set up timer which moves application to RUNNING state if it is still
327 // alive in 1 second.
328 start_timer_->Schedule(event_loop_->monotonic_now() +
329 std::chrono::seconds(1));
330 // Since we are the parent process, clear our write-side of all the
331 // pipes.
332 status_pipes_.write.reset();
333 stdout_pipes_.write.reset();
334 stderr_pipes_.write.reset();
335 }
336 OnChange();
337 return;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800338 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700339
340 // Clear any signal handlers so that they don't accidentally interfere with
341 // the parent process. Is there a better way to iterate over all the
342 // signals? Right now we're just dealing with the most common ones.
343 for (int signal : {SIGINT, SIGHUP, SIGTERM}) {
344 struct sigaction action;
345 sigemptyset(&action.sa_mask);
346 action.sa_flags = 0;
347 action.sa_handler = SIG_DFL;
348 PCHECK(sigaction(signal, &action, nullptr) == 0);
349 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800350 }
351
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700352 if (memory_cgroup_) {
353 memory_cgroup_->AddTid();
354 }
355
James Kuszmauld42edb42022-01-07 18:00:16 -0800356 // Since we are the child process, clear our read-side of all the pipes.
357 status_pipes_.read.reset();
358 stdout_pipes_.read.reset();
359 stderr_pipes_.read.reset();
360
361 // The status pipe will not be needed if the execve succeeds.
362 status_pipes_.write->SetCloexec();
363
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800364 // Clear out signal mask of parent so forked process receives all signals
365 // normally.
366 sigset_t empty_mask;
367 sigemptyset(&empty_mask);
368 sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
369
370 // Cleanup children if starter dies in a way that is not handled gracefully.
371 if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800372 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800373 static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
374 PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
375 }
376
377 if (group_) {
378 CHECK(!user_name_.empty());
379 // The manpage for setgroups says we just need CAP_SETGID, but empirically
380 // we also need the effective UID to be 0 to make it work. user_ must also
381 // be set so we change this effective UID back later.
382 CHECK(user_);
383 if (seteuid(0) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800384 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800385 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
386 PLOG(FATAL) << "Could not seteuid(0) for " << name_
387 << " in preparation for setting groups";
388 }
389 if (initgroups(user_name_.c_str(), *group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800390 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800391 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
392 PLOG(FATAL) << "Could not initialize normal groups for " << name_
393 << " as " << user_name_ << " with " << *group_;
394 }
395 if (setgid(*group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800396 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800397 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
398 PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
399 }
400 }
401
402 if (user_) {
403 if (setuid(*user_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800404 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800405 static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
406 PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
407 }
408 }
409
James Kuszmauld42edb42022-01-07 18:00:16 -0800410 if (capture_stdout_) {
411 PCHECK(STDOUT_FILENO == dup2(stdout_pipes_.write->fd(), STDOUT_FILENO));
412 stdout_pipes_.write.reset();
413 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800414
James Kuszmauld42edb42022-01-07 18:00:16 -0800415 if (capture_stderr_) {
416 PCHECK(STDERR_FILENO == dup2(stderr_pipes_.write->fd(), STDERR_FILENO));
417 stderr_pipes_.write.reset();
418 }
419
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700420 if (run_as_sudo_) {
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700421 // For sudo we must supply the actual path
James Kuszmaul37a56af2023-07-29 15:15:16 -0700422 args_.insert(args_.begin(), path_.c_str());
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700423 args_.insert(args_.begin(), kSudo);
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700424 } else {
425 // argv[0] should be the program name
426 args_.insert(args_.begin(), name_);
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700427 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800428
429 std::vector<char *> cargs = CArgs();
Philipp Schrader790cb542023-07-05 21:06:52 -0700430 const char *path = run_as_sudo_ ? kSudo : path_.c_str();
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700431 execvp(path, cargs.data());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800432
433 // If we got here, something went wrong
James Kuszmauld42edb42022-01-07 18:00:16 -0800434 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800435 static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
Philipp Schrader595979d2023-09-13 11:31:48 -0700436 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
437 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700438 << "Could not execute " << name_ << " (" << path_ << ')';
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800439
440 _exit(EXIT_FAILURE);
441}
442
James Kuszmaul8544c492023-07-31 15:00:38 -0700443void Application::ObserveTimingReport(
444 const aos::monotonic_clock::time_point send_time,
445 const aos::timing::Report *msg) {
446 if (msg->name()->string_view() == name_ && msg->pid() == pid_ &&
447 msg->has_version()) {
448 latest_timing_report_version_ = msg->version()->str();
449 last_timing_report_ = send_time;
450 }
451}
452
James Kuszmauld42edb42022-01-07 18:00:16 -0800453void Application::FetchOutputs() {
454 if (capture_stdout_) {
455 stdout_pipes_.read->Read(&stdout_);
456 }
457 if (capture_stderr_) {
458 stderr_pipes_.read->Read(&stderr_);
459 }
460}
461
462const std::string &Application::GetStdout() {
463 CHECK(capture_stdout_);
464 FetchOutputs();
465 return stdout_;
466}
467
468const std::string &Application::GetStderr() {
469 CHECK(capture_stderr_);
470 FetchOutputs();
471 return stderr_;
472}
473
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800474void Application::DoStop(bool restart) {
475 // If stop or restart received, the old state of these is no longer applicable
476 // so cancel both.
477 restart_timer_->Disable();
478 start_timer_->Disable();
479
James Kuszmauld42edb42022-01-07 18:00:16 -0800480 FetchOutputs();
481
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800482 switch (status_) {
483 case aos::starter::State::STARTING:
484 case aos::starter::State::RUNNING: {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700485 file_state_ = FileState::NOT_RUNNING;
Philipp Schrader595979d2023-09-13 11:31:48 -0700486 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
487 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700488 << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
489 << SIGINT;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800490 status_ = aos::starter::State::STOPPING;
491
Philipp Schrader595979d2023-09-13 11:31:48 -0700492 if (kill(pid_, SIGINT) != 0) {
493 PLOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
494 quiet_flag_ == QuietLogging::kNotForDebugging)
495 << "Failed to send signal " << SIGINT << " to '" << name_
496 << "' pid: " << pid_;
497 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800498
499 // Watchdog timer to SIGKILL application if it is still running 1 second
500 // after SIGINT
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800501 stop_timer_->Schedule(event_loop_->monotonic_now() + stop_grace_period_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800502 queue_restart_ = restart;
Austin Schuh1cea9032023-07-10 11:56:40 -0700503 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800504 break;
505 }
506 case aos::starter::State::WAITING: {
507 // If waiting to restart, and receives restart, skip the waiting period
508 // and restart immediately. If stop received, all we have to do is move
509 // to the STOPPED state.
510 if (restart) {
511 DoStart();
512 } else {
513 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700514 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800515 }
516 break;
517 }
518 case aos::starter::State::STOPPING: {
519 // If the application is already stopping, then we just need to update the
520 // restart flag to the most recent status.
521 queue_restart_ = restart;
522 break;
523 }
524 case aos::starter::State::STOPPED: {
525 // Restart immediately if the application is already stopped
526 if (restart) {
527 status_ = aos::starter::State::WAITING;
528 DoStart();
529 }
530 break;
531 }
532 }
533}
534
535void Application::QueueStart() {
536 status_ = aos::starter::State::WAITING;
537
payton.rehl2841b1c2023-05-25 17:23:55 -0700538 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
539 << "Restarting " << name_ << " in 3 seconds";
Philipp Schradera6712522023-07-05 20:25:11 -0700540 restart_timer_->Schedule(event_loop_->monotonic_now() +
541 std::chrono::seconds(3));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800542 start_timer_->Disable();
543 stop_timer_->Disable();
Austin Schuh1cea9032023-07-10 11:56:40 -0700544 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800545}
546
James Kuszmauld42edb42022-01-07 18:00:16 -0800547std::vector<char *> Application::CArgs() {
548 std::vector<char *> cargs;
549 std::transform(args_.begin(), args_.end(), std::back_inserter(cargs),
550 [](std::string &str) { return str.data(); });
551 cargs.push_back(nullptr);
552 return cargs;
553}
554
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800555void Application::set_args(
556 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
557 args_.clear();
558 std::transform(v.begin(), v.end(), std::back_inserter(args_),
James Kuszmauld42edb42022-01-07 18:00:16 -0800559 [](const flatbuffers::String *str) { return str->str(); });
560}
561
562void Application::set_args(std::vector<std::string> args) {
563 args_ = std::move(args);
564}
565
566void Application::set_capture_stdout(bool capture) {
567 capture_stdout_ = capture;
568}
569
570void Application::set_capture_stderr(bool capture) {
571 capture_stderr_ = capture;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800572}
573
574std::optional<uid_t> Application::FindUid(const char *name) {
575 // TODO(austin): Use the reentrant version. This should be safe.
576 struct passwd *user_data = getpwnam(name);
577 if (user_data != nullptr) {
578 return user_data->pw_uid;
579 } else {
580 LOG(FATAL) << "Could not find user " << name;
581 return std::nullopt;
582 }
583}
584
585std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
586 // TODO(austin): Use the reentrant version. This should be safe.
587 struct passwd *user_data = getpwnam(name);
588 if (user_data != nullptr) {
589 return user_data->pw_gid;
590 } else {
591 LOG(FATAL) << "Could not find user " << name;
592 return std::nullopt;
593 }
594}
595
James Kuszmaul37a56af2023-07-29 15:15:16 -0700596FileState Application::UpdateFileState() {
597 // On every call, check if a different file is present on disk. Note that
598 // while the applications is running, the file cannot be changed without the
599 // inode changing.
600 // We could presumably use inotify or the such to watch the file instead,
601 // but this works and we do not expect substantial cost from reading the inode
602 // of a file every time we send out a status message.
603 if (InodeChanged(path_, pre_fork_inode_)) {
604 switch (file_state_) {
605 case FileState::NO_CHANGE:
606 file_state_ = FileState::CHANGED;
607 break;
608 case FileState::NOT_RUNNING:
609 case FileState::CHANGED_DURING_STARTUP:
610 case FileState::CHANGED:
611 break;
612 }
613 }
614 return file_state_;
615}
616
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800617flatbuffers::Offset<aos::starter::ApplicationStatus>
James Kuszmaul6295a642022-03-22 15:23:59 -0700618Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
619 util::Top *top) {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700620 UpdateFileState();
621
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800622 CHECK_NOTNULL(builder);
623 auto name_fbs = builder->CreateString(name_);
624
James Kuszmaul6295a642022-03-22 15:23:59 -0700625 const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
626 const flatbuffers::Offset<util::ProcessInfo> process_info =
627 valid_pid ? top->InfoForProcess(builder, pid_)
628 : flatbuffers::Offset<util::ProcessInfo>();
629
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800630 aos::starter::ApplicationStatus::Builder status_builder(*builder);
631 status_builder.add_name(name_fbs);
632 status_builder.add_state(status_);
James Kuszmauld42edb42022-01-07 18:00:16 -0800633 if (exit_code_.has_value()) {
634 status_builder.add_last_exit_code(exit_code_.value());
635 }
James Kuszmaul8544c492023-07-31 15:00:38 -0700636 status_builder.add_has_active_timing_report(
637 last_timing_report_ +
638 // Leave a bit of margin on the timing report receipt time, to allow
639 // for timing errors.
640 3 * std::chrono::milliseconds(FLAGS_timing_report_ms) >
641 event_loop_->monotonic_now());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800642 status_builder.add_last_stop_reason(stop_reason_);
643 if (pid_ != -1) {
644 status_builder.add_pid(pid_);
645 status_builder.add_id(id_);
646 }
James Kuszmaul6295a642022-03-22 15:23:59 -0700647 // Note that even if process_info is null, calling add_process_info is fine.
648 status_builder.add_process_info(process_info);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800649 status_builder.add_last_start_time(start_time_.time_since_epoch().count());
James Kuszmaul37a56af2023-07-29 15:15:16 -0700650 status_builder.add_file_state(file_state_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800651 return status_builder.Finish();
652}
653
654void Application::Terminate() {
655 stop_reason_ = aos::starter::LastStopReason::TERMINATE;
656 DoStop(false);
657 terminating_ = true;
658}
659
660void Application::HandleCommand(aos::starter::Command cmd) {
661 switch (cmd) {
662 case aos::starter::Command::START: {
663 switch (status_) {
664 case aos::starter::State::WAITING: {
665 restart_timer_->Disable();
666 DoStart();
667 break;
668 }
669 case aos::starter::State::STARTING: {
670 break;
671 }
672 case aos::starter::State::RUNNING: {
673 break;
674 }
675 case aos::starter::State::STOPPING: {
676 queue_restart_ = true;
677 break;
678 }
679 case aos::starter::State::STOPPED: {
680 status_ = aos::starter::State::WAITING;
681 DoStart();
682 break;
683 }
684 }
685 break;
686 }
687 case aos::starter::Command::STOP: {
688 stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
689 DoStop(false);
690 break;
691 }
692 case aos::starter::Command::RESTART: {
693 stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
694 DoStop(true);
695 break;
696 }
697 }
698}
699
700bool Application::MaybeHandleSignal() {
701 int status;
702
Sarah Newman21c59202022-06-16 12:36:33 -0700703 if (status_ == aos::starter::State::WAITING ||
704 status_ == aos::starter::State::STOPPED) {
705 // We can't possibly have received a signal meant for this process.
706 return false;
707 }
708
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800709 // Check if the status of this process has changed
Sarah Newman21c59202022-06-16 12:36:33 -0700710 // The PID won't be -1 if this application has ever been run successfully
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800711 if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
712 return false;
713 }
714
715 // Check that the event was the process exiting
716 if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
717 return false;
718 }
719
James Kuszmauld42edb42022-01-07 18:00:16 -0800720 start_timer_->Disable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800721 exit_time_ = event_loop_->monotonic_now();
722 exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
James Kuszmaul37a56af2023-07-29 15:15:16 -0700723 file_state_ = FileState::NOT_RUNNING;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800724
James Kuszmauld42edb42022-01-07 18:00:16 -0800725 if (auto read_result = status_pipes_.read->Read()) {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800726 stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
727 }
728
James Kuszmaulb740f452023-11-14 17:44:29 -0800729 const std::string starter_version_string =
730 absl::StrCat("starter version '",
731 event_loop_->VersionString().value_or("unknown"), "'");
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800732 switch (status_) {
733 case aos::starter::State::STARTING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800734 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700735 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
736 << "Application '" << name_ << "' pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800737 << " exited with status " << exit_code_.value() << " and "
738 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800739 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700740 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
741 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700742 << "Failed to start '" << name_ << "' on pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800743 << " : Exited with status " << exit_code_.value() << " and "
744 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800745 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800746 if (autorestart()) {
747 QueueStart();
748 } else {
749 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700750 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800751 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800752 break;
753 }
754 case aos::starter::State::RUNNING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800755 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700756 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
757 << "Application '" << name_ << "' pid " << pid_
758 << " exited with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800759 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700760 if (quiet_flag_ == QuietLogging::kNo ||
761 quiet_flag_ == QuietLogging::kNotForDebugging) {
James Kuszmaulb740f452023-11-14 17:44:29 -0800762 const std::string version_string =
James Kuszmaul8544c492023-07-31 15:00:38 -0700763 latest_timing_report_version_.has_value()
James Kuszmaulb740f452023-11-14 17:44:29 -0800764 ? absl::StrCat("version '",
765 latest_timing_report_version_.value(), "'")
766 : starter_version_string;
James Kuszmaul8544c492023-07-31 15:00:38 -0700767 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
James Kuszmaulb740f452023-11-14 17:44:29 -0800768 << "Application '" << name_ << "' pid " << pid_ << " "
James Kuszmaul8544c492023-07-31 15:00:38 -0700769 << version_string << " exited unexpectedly with status "
770 << exit_code_.value();
771 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800772 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800773 if (autorestart()) {
774 QueueStart();
775 } else {
776 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700777 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800778 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800779 break;
780 }
781 case aos::starter::State::STOPPING: {
payton.rehl2841b1c2023-05-25 17:23:55 -0700782 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
783 << "Successfully stopped '" << name_ << "' pid: " << pid_
784 << " with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800785 status_ = aos::starter::State::STOPPED;
786
787 // Disable force stop timer since the process already died
788 stop_timer_->Disable();
789
Austin Schuh1cea9032023-07-10 11:56:40 -0700790 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800791 if (terminating_) {
792 return true;
793 }
794
795 if (queue_restart_) {
796 queue_restart_ = false;
797 status_ = aos::starter::State::WAITING;
798 DoStart();
799 }
800 break;
801 }
802 case aos::starter::State::WAITING:
803 case aos::starter::State::STOPPED: {
Sarah Newman21c59202022-06-16 12:36:33 -0700804 __builtin_unreachable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800805 break;
806 }
807 }
808
809 return false;
810}
811
Austin Schuh1cea9032023-07-10 11:56:40 -0700812void Application::OnChange() {
813 for (auto &fn : on_change_) {
814 fn();
815 }
816}
817
Adam Snaider70deaf22023-08-11 13:58:34 -0700818Application::~Application() {
819 start_timer_->Disable();
820 restart_timer_->Disable();
821 stop_timer_->Disable();
822 pipe_timer_->Disable();
823 child_status_handler_->Disable();
824}
825
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800826} // namespace aos::starter