blob: 07057a0c19eb76ff18582b36d8492ad015dfd9bf [file] [log] [blame]
James Kuszmaul3224b8e2022-01-07 19:00:39 -08001#include "aos/starter/subprocess.h"
2
3#include <grp.h>
4#include <pwd.h>
5#include <sys/prctl.h>
6#include <sys/types.h>
7#include <sys/wait.h>
8
James Kuszmaul37a56af2023-07-29 15:15:16 -07009#include "absl/strings/str_split.h"
James Kuszmaul3224b8e2022-01-07 19:00:39 -080010#include "glog/logging.h"
11
James Kuszmaul8544c492023-07-31 15:00:38 -070012#include "aos/flatbuffer_merge.h"
13
James Kuszmaul3224b8e2022-01-07 19:00:39 -080014namespace aos::starter {
15
Philipp Schraderfa8fc492023-09-26 14:52:02 -070016// Blocks all signals while an instance of this class is in scope.
17class ScopedCompleteSignalBlocker {
18 public:
19 ScopedCompleteSignalBlocker() {
20 sigset_t mask;
21 sigfillset(&mask);
22 // Remember the current mask.
23 PCHECK(sigprocmask(SIG_SETMASK, &mask, &old_mask_) == 0);
24 }
25
26 ~ScopedCompleteSignalBlocker() {
27 // Restore the remembered mask.
28 PCHECK(sigprocmask(SIG_SETMASK, &old_mask_, nullptr) == 0);
29 }
30
31 private:
32 sigset_t old_mask_;
33};
34
James Kuszmaul37a56af2023-07-29 15:15:16 -070035namespace {
36std::optional<ino_t> GetInodeForPath(const std::filesystem::path &path) {
37 struct stat stat_buf;
38 if (0 != stat(path.c_str(), &stat_buf)) {
39 return std::nullopt;
40 }
41 return stat_buf.st_ino;
42}
43bool InodeChanged(const std::filesystem::path &path, ino_t previous_inode) {
44 const std::optional<ino_t> current_inode = GetInodeForPath(path);
45 if (!current_inode.has_value()) {
46 return true;
47 }
48 return current_inode.value() != previous_inode;
49}
50} // namespace
51
52std::filesystem::path ResolvePath(std::string_view command) {
53 std::filesystem::path command_path = command;
54 if (command.find("/") != std::string_view::npos) {
55 CHECK(std::filesystem::exists(command_path))
56 << ": " << command << " does not exist.";
57 return std::filesystem::canonical(command_path);
58 }
59 const char *system_path = getenv("PATH");
60 std::string system_path_buffer;
61 if (system_path == nullptr) {
62 const size_t default_path_length = confstr(_CS_PATH, nullptr, 0);
63 PCHECK(default_path_length != 0) << ": Unable to resolve " << command;
64 system_path_buffer.resize(default_path_length);
65 confstr(_CS_PATH, system_path_buffer.data(), system_path_buffer.size());
66 system_path = system_path_buffer.c_str();
67 VLOG(2) << "Using default path of " << system_path
68 << " in the absence of PATH being set.";
69 }
70 const std::vector<std::string_view> search_paths =
71 absl::StrSplit(system_path, ':');
72 for (const std::string_view search_path : search_paths) {
73 const std::filesystem::path candidate =
74 std::filesystem::path(search_path) / command_path;
75 if (std::filesystem::exists(candidate)) {
76 return std::filesystem::canonical(candidate);
77 }
78 }
79 LOG(FATAL) << "Unable to resolve " << command;
80}
81
Austin Schuhbbeb37e2022-08-17 16:19:27 -070082// RAII class to become root and restore back to the original user and group
83// afterwards.
84class Sudo {
85 public:
86 Sudo() {
87 // Save what we were.
88 PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
89 PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
90
91 // Become root.
92 PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
93 << ": Failed to become root";
94 PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
95 << ": Failed to become root";
96 }
97
98 ~Sudo() {
99 // And recover.
100 PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
101 PCHECK(setresuid(ruid_, euid_, suid_) == 0);
102 }
103
104 uid_t ruid_, euid_, suid_;
105 gid_t rgid_, egid_, sgid_;
106};
107
Austin Schuh77e20a32023-08-01 12:25:03 -0700108MemoryCGroup::MemoryCGroup(std::string_view name, Create should_create)
109 : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)),
110 should_create_(should_create) {
111 if (should_create_ == Create::kDoCreate) {
112 Sudo sudo;
113 int ret = mkdir(cgroup_.c_str(), 0755);
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700114
Austin Schuh77e20a32023-08-01 12:25:03 -0700115 if (ret != 0) {
116 if (errno == EEXIST) {
117 PCHECK(rmdir(cgroup_.c_str()) == 0)
118 << ": Failed to remove previous cgroup " << cgroup_;
119 ret = mkdir(cgroup_.c_str(), 0755);
120 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700121 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700122
Austin Schuh77e20a32023-08-01 12:25:03 -0700123 if (ret != 0) {
124 PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
125 << ", do you have permission?";
126 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700127 }
128}
129
130void MemoryCGroup::AddTid(pid_t pid) {
131 if (pid == 0) {
132 pid = getpid();
133 }
Austin Schuh77e20a32023-08-01 12:25:03 -0700134 if (should_create_ == Create::kDoCreate) {
135 Sudo sudo;
136 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
137 std::to_string(pid));
138 } else {
139 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
140 std::to_string(pid));
141 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700142}
143
144void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
Austin Schuh77e20a32023-08-01 12:25:03 -0700145 if (should_create_ == Create::kDoCreate) {
146 Sudo sudo;
147 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
148 std::to_string(limit_value));
149 } else {
150 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
151 std::to_string(limit_value));
152 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700153}
154
155MemoryCGroup::~MemoryCGroup() {
Austin Schuh77e20a32023-08-01 12:25:03 -0700156 if (should_create_ == Create::kDoCreate) {
157 Sudo sudo;
158 PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
159 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700160}
161
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800162SignalListener::SignalListener(aos::ShmEventLoop *loop,
163 std::function<void(signalfd_siginfo)> callback)
Austin Schuh1cea9032023-07-10 11:56:40 -0700164 : SignalListener(loop->epoll(), std::move(callback)) {}
165
166SignalListener::SignalListener(aos::internal::EPoll *epoll,
167 std::function<void(signalfd_siginfo)> callback)
168 : SignalListener(epoll, callback,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800169 {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
170 SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
171
172SignalListener::SignalListener(aos::ShmEventLoop *loop,
173 std::function<void(signalfd_siginfo)> callback,
174 std::initializer_list<unsigned int> signals)
Austin Schuh1cea9032023-07-10 11:56:40 -0700175 : SignalListener(loop->epoll(), std::move(callback), std::move(signals)) {}
176
177SignalListener::SignalListener(aos::internal::EPoll *epoll,
178 std::function<void(signalfd_siginfo)> callback,
179 std::initializer_list<unsigned int> signals)
180 : epoll_(epoll), callback_(std::move(callback)), signalfd_(signals) {
181 epoll_->OnReadable(signalfd_.fd(), [this] {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800182 signalfd_siginfo info = signalfd_.Read();
183
184 if (info.ssi_signo == 0) {
185 LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
186 return;
187 }
188
189 callback_(info);
190 });
191}
192
Austin Schuh1cea9032023-07-10 11:56:40 -0700193SignalListener::~SignalListener() { epoll_->DeleteFd(signalfd_.fd()); }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800194
James Kuszmauld42edb42022-01-07 18:00:16 -0800195Application::Application(std::string_view name,
196 std::string_view executable_name,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800197 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700198 std::function<void()> on_change,
199 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800200 : name_(name),
James Kuszmaul37a56af2023-07-29 15:15:16 -0700201 path_(ResolvePath(executable_name)),
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800202 event_loop_(event_loop),
203 start_timer_(event_loop_->AddTimer([this] {
204 status_ = aos::starter::State::RUNNING;
payton.rehl2841b1c2023-05-25 17:23:55 -0700205 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
206 << "Started '" << name_ << "' pid: " << pid_;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700207 // Check if the file on disk changed while we were starting up. We allow
208 // this state for the same reason that we don't just use /proc/$pid/exe
209 // to determine if the file is deleted--we may be running a script or
210 // sudo or the such and determining the state of the file that we
211 // actually care about sounds like more work than we want to deal with.
212 if (InodeChanged(path_, pre_fork_inode_)) {
213 file_state_ = FileState::CHANGED_DURING_STARTUP;
214 } else {
215 file_state_ = FileState::NO_CHANGE;
216 }
217
218 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800219 })),
220 restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
221 stop_timer_(event_loop_->AddTimer([this] {
222 if (kill(pid_, SIGKILL) == 0) {
Philipp Schrader595979d2023-09-13 11:31:48 -0700223 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
224 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700225 << "Failed to stop, sending SIGKILL to '" << name_
226 << "' pid: " << pid_;
Sarah Newman9687e062023-09-08 12:22:27 -0700227 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700228 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
229 quiet_flag_ == QuietLogging::kNotForDebugging)
Sarah Newman9687e062023-09-08 12:22:27 -0700230 << "Failed to send SIGKILL to '" << name_ << "' pid: " << pid_;
231 stop_timer_->Schedule(event_loop_->monotonic_now() +
232 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800233 }
234 })),
James Kuszmauld42edb42022-01-07 18:00:16 -0800235 pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
236 child_status_handler_(
237 event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
Austin Schuh1cea9032023-07-10 11:56:40 -0700238 on_change_({on_change}),
payton.rehl2841b1c2023-05-25 17:23:55 -0700239 quiet_flag_(quiet_flag) {
James Kuszmaul06a8f352024-03-15 14:15:57 -0700240 // Keep the length of the timer name bounded to some reasonable length.
241 start_timer_->set_name(absl::StrCat("app_start_", name.substr(0, 10)));
242 restart_timer_->set_name(absl::StrCat("app_restart_", name.substr(0, 10)));
243 stop_timer_->set_name(absl::StrCat("app_stop_", name.substr(0, 10)));
244 pipe_timer_->set_name(absl::StrCat("app_pipe_", name.substr(0, 10)));
245 child_status_handler_->set_name(
246 absl::StrCat("app_status_handler_", name.substr(0, 10)));
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700247 // Every second poll to check if the child is dead. This is used as a
James Kuszmaul06a8f352024-03-15 14:15:57 -0700248 // default for the case where the user is not directly catching SIGCHLD
249 // and calling MaybeHandleSignal for us.
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700250 child_status_handler_->Schedule(event_loop_->monotonic_now(),
251 std::chrono::seconds(1));
James Kuszmauld42edb42022-01-07 18:00:16 -0800252}
253
254Application::Application(const aos::Application *application,
255 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700256 std::function<void()> on_change,
257 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800258 : Application(application->name()->string_view(),
259 application->has_executable_name()
260 ? application->executable_name()->string_view()
261 : application->name()->string_view(),
payton.rehl2841b1c2023-05-25 17:23:55 -0700262 event_loop, on_change, quiet_flag) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800263 user_name_ = application->has_user() ? application->user()->str() : "";
264 user_ = application->has_user() ? FindUid(user_name_.c_str()) : std::nullopt;
265 group_ = application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
266 : std::nullopt;
267 autostart_ = application->autostart();
268 autorestart_ = application->autorestart();
269 if (application->has_args()) {
270 set_args(*application->args());
271 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700272
273 if (application->has_memory_limit() && application->memory_limit() > 0) {
274 SetMemoryLimit(application->memory_limit());
275 }
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800276
277 set_stop_grace_period(std::chrono::nanoseconds(application->stop_time()));
James Kuszmauld42edb42022-01-07 18:00:16 -0800278}
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800279
280void Application::DoStart() {
281 if (status_ != aos::starter::State::WAITING) {
282 return;
283 }
284
285 start_timer_->Disable();
286 restart_timer_->Disable();
287
James Kuszmauld42edb42022-01-07 18:00:16 -0800288 status_pipes_ = util::ScopedPipe::MakePipe();
289
290 if (capture_stdout_) {
291 stdout_pipes_ = util::ScopedPipe::MakePipe();
292 stdout_.clear();
293 }
294 if (capture_stderr_) {
295 stderr_pipes_ = util::ScopedPipe::MakePipe();
296 stderr_.clear();
297 }
298
Philipp Schradera6712522023-07-05 20:25:11 -0700299 pipe_timer_->Schedule(event_loop_->monotonic_now(),
300 std::chrono::milliseconds(100));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800301
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700302 {
303 // Block all signals during the fork() call. Together with the default
304 // signal handler restoration below, This prevents signal handlers from
305 // getting called in the child and accidentally affecting the parent. In
306 // particular, the exit handler for shm_event_loop could be called here if
307 // we don't exec() quickly enough.
308 ScopedCompleteSignalBlocker signal_blocker;
James Kuszmaul37a56af2023-07-29 15:15:16 -0700309 {
310 const std::optional<ino_t> inode = GetInodeForPath(path_);
311 CHECK(inode.has_value())
312 << ": " << path_ << " does not seem to be stat'able.";
313 pre_fork_inode_ = inode.value();
314 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700315 const pid_t pid = fork();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800316
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700317 if (pid != 0) {
318 if (pid == -1) {
319 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
320 quiet_flag_ == QuietLogging::kNotForDebugging)
321 << "Failed to fork '" << name_ << "'";
322 stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
323 status_ = aos::starter::State::STOPPED;
324 } else {
325 pid_ = pid;
326 id_ = next_id_++;
327 start_time_ = event_loop_->monotonic_now();
328 status_ = aos::starter::State::STARTING;
329 latest_timing_report_version_.reset();
330 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
331 << "Starting '" << name_ << "' pid " << pid_;
332
333 // Set up timer which moves application to RUNNING state if it is still
334 // alive in 1 second.
335 start_timer_->Schedule(event_loop_->monotonic_now() +
336 std::chrono::seconds(1));
337 // Since we are the parent process, clear our write-side of all the
338 // pipes.
339 status_pipes_.write.reset();
340 stdout_pipes_.write.reset();
341 stderr_pipes_.write.reset();
342 }
343 OnChange();
344 return;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800345 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700346
347 // Clear any signal handlers so that they don't accidentally interfere with
348 // the parent process. Is there a better way to iterate over all the
349 // signals? Right now we're just dealing with the most common ones.
350 for (int signal : {SIGINT, SIGHUP, SIGTERM}) {
351 struct sigaction action;
352 sigemptyset(&action.sa_mask);
353 action.sa_flags = 0;
354 action.sa_handler = SIG_DFL;
355 PCHECK(sigaction(signal, &action, nullptr) == 0);
356 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800357 }
358
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700359 if (memory_cgroup_) {
360 memory_cgroup_->AddTid();
361 }
362
James Kuszmauld42edb42022-01-07 18:00:16 -0800363 // Since we are the child process, clear our read-side of all the pipes.
364 status_pipes_.read.reset();
365 stdout_pipes_.read.reset();
366 stderr_pipes_.read.reset();
367
368 // The status pipe will not be needed if the execve succeeds.
369 status_pipes_.write->SetCloexec();
370
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800371 // Clear out signal mask of parent so forked process receives all signals
372 // normally.
373 sigset_t empty_mask;
374 sigemptyset(&empty_mask);
375 sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
376
377 // Cleanup children if starter dies in a way that is not handled gracefully.
378 if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800379 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800380 static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
381 PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
382 }
383
384 if (group_) {
385 CHECK(!user_name_.empty());
386 // The manpage for setgroups says we just need CAP_SETGID, but empirically
387 // we also need the effective UID to be 0 to make it work. user_ must also
388 // be set so we change this effective UID back later.
389 CHECK(user_);
390 if (seteuid(0) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800391 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800392 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
393 PLOG(FATAL) << "Could not seteuid(0) for " << name_
394 << " in preparation for setting groups";
395 }
396 if (initgroups(user_name_.c_str(), *group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800397 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800398 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
399 PLOG(FATAL) << "Could not initialize normal groups for " << name_
400 << " as " << user_name_ << " with " << *group_;
401 }
402 if (setgid(*group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800403 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800404 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
405 PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
406 }
407 }
408
409 if (user_) {
410 if (setuid(*user_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800411 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800412 static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
413 PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
414 }
415 }
416
James Kuszmauld42edb42022-01-07 18:00:16 -0800417 if (capture_stdout_) {
418 PCHECK(STDOUT_FILENO == dup2(stdout_pipes_.write->fd(), STDOUT_FILENO));
419 stdout_pipes_.write.reset();
420 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800421
James Kuszmauld42edb42022-01-07 18:00:16 -0800422 if (capture_stderr_) {
423 PCHECK(STDERR_FILENO == dup2(stderr_pipes_.write->fd(), STDERR_FILENO));
424 stderr_pipes_.write.reset();
425 }
426
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700427 if (run_as_sudo_) {
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700428 // For sudo we must supply the actual path
James Kuszmaul37a56af2023-07-29 15:15:16 -0700429 args_.insert(args_.begin(), path_.c_str());
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700430 args_.insert(args_.begin(), kSudo);
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700431 } else {
432 // argv[0] should be the program name
433 args_.insert(args_.begin(), name_);
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700434 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800435
436 std::vector<char *> cargs = CArgs();
Philipp Schrader790cb542023-07-05 21:06:52 -0700437 const char *path = run_as_sudo_ ? kSudo : path_.c_str();
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700438 execvp(path, cargs.data());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800439
440 // If we got here, something went wrong
James Kuszmauld42edb42022-01-07 18:00:16 -0800441 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800442 static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
Philipp Schrader595979d2023-09-13 11:31:48 -0700443 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
444 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700445 << "Could not execute " << name_ << " (" << path_ << ')';
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800446
447 _exit(EXIT_FAILURE);
448}
449
James Kuszmaul8544c492023-07-31 15:00:38 -0700450void Application::ObserveTimingReport(
451 const aos::monotonic_clock::time_point send_time,
452 const aos::timing::Report *msg) {
453 if (msg->name()->string_view() == name_ && msg->pid() == pid_ &&
454 msg->has_version()) {
455 latest_timing_report_version_ = msg->version()->str();
456 last_timing_report_ = send_time;
457 }
458}
459
James Kuszmauld42edb42022-01-07 18:00:16 -0800460void Application::FetchOutputs() {
461 if (capture_stdout_) {
462 stdout_pipes_.read->Read(&stdout_);
463 }
464 if (capture_stderr_) {
465 stderr_pipes_.read->Read(&stderr_);
466 }
467}
468
469const std::string &Application::GetStdout() {
470 CHECK(capture_stdout_);
471 FetchOutputs();
472 return stdout_;
473}
474
475const std::string &Application::GetStderr() {
476 CHECK(capture_stderr_);
477 FetchOutputs();
478 return stderr_;
479}
480
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800481void Application::DoStop(bool restart) {
482 // If stop or restart received, the old state of these is no longer applicable
483 // so cancel both.
484 restart_timer_->Disable();
485 start_timer_->Disable();
486
James Kuszmauld42edb42022-01-07 18:00:16 -0800487 FetchOutputs();
488
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800489 switch (status_) {
490 case aos::starter::State::STARTING:
491 case aos::starter::State::RUNNING: {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700492 file_state_ = FileState::NOT_RUNNING;
Philipp Schrader595979d2023-09-13 11:31:48 -0700493 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
494 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700495 << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
496 << SIGINT;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800497 status_ = aos::starter::State::STOPPING;
498
Philipp Schrader595979d2023-09-13 11:31:48 -0700499 if (kill(pid_, SIGINT) != 0) {
500 PLOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
501 quiet_flag_ == QuietLogging::kNotForDebugging)
502 << "Failed to send signal " << SIGINT << " to '" << name_
503 << "' pid: " << pid_;
504 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800505
506 // Watchdog timer to SIGKILL application if it is still running 1 second
507 // after SIGINT
Philipp Schraderc8e779e2024-01-25 16:32:39 -0800508 stop_timer_->Schedule(event_loop_->monotonic_now() + stop_grace_period_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800509 queue_restart_ = restart;
Austin Schuh1cea9032023-07-10 11:56:40 -0700510 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800511 break;
512 }
513 case aos::starter::State::WAITING: {
514 // If waiting to restart, and receives restart, skip the waiting period
515 // and restart immediately. If stop received, all we have to do is move
516 // to the STOPPED state.
517 if (restart) {
518 DoStart();
519 } else {
520 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700521 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800522 }
523 break;
524 }
525 case aos::starter::State::STOPPING: {
526 // If the application is already stopping, then we just need to update the
527 // restart flag to the most recent status.
528 queue_restart_ = restart;
529 break;
530 }
531 case aos::starter::State::STOPPED: {
532 // Restart immediately if the application is already stopped
533 if (restart) {
534 status_ = aos::starter::State::WAITING;
535 DoStart();
536 }
537 break;
538 }
539 }
540}
541
542void Application::QueueStart() {
543 status_ = aos::starter::State::WAITING;
544
payton.rehl2841b1c2023-05-25 17:23:55 -0700545 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
546 << "Restarting " << name_ << " in 3 seconds";
Philipp Schradera6712522023-07-05 20:25:11 -0700547 restart_timer_->Schedule(event_loop_->monotonic_now() +
548 std::chrono::seconds(3));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800549 start_timer_->Disable();
550 stop_timer_->Disable();
Austin Schuh1cea9032023-07-10 11:56:40 -0700551 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800552}
553
James Kuszmauld42edb42022-01-07 18:00:16 -0800554std::vector<char *> Application::CArgs() {
555 std::vector<char *> cargs;
556 std::transform(args_.begin(), args_.end(), std::back_inserter(cargs),
557 [](std::string &str) { return str.data(); });
558 cargs.push_back(nullptr);
559 return cargs;
560}
561
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800562void Application::set_args(
563 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
564 args_.clear();
565 std::transform(v.begin(), v.end(), std::back_inserter(args_),
James Kuszmauld42edb42022-01-07 18:00:16 -0800566 [](const flatbuffers::String *str) { return str->str(); });
567}
568
569void Application::set_args(std::vector<std::string> args) {
570 args_ = std::move(args);
571}
572
573void Application::set_capture_stdout(bool capture) {
574 capture_stdout_ = capture;
575}
576
577void Application::set_capture_stderr(bool capture) {
578 capture_stderr_ = capture;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800579}
580
581std::optional<uid_t> Application::FindUid(const char *name) {
582 // TODO(austin): Use the reentrant version. This should be safe.
583 struct passwd *user_data = getpwnam(name);
584 if (user_data != nullptr) {
585 return user_data->pw_uid;
586 } else {
587 LOG(FATAL) << "Could not find user " << name;
588 return std::nullopt;
589 }
590}
591
592std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
593 // TODO(austin): Use the reentrant version. This should be safe.
594 struct passwd *user_data = getpwnam(name);
595 if (user_data != nullptr) {
596 return user_data->pw_gid;
597 } else {
598 LOG(FATAL) << "Could not find user " << name;
599 return std::nullopt;
600 }
601}
602
James Kuszmaul37a56af2023-07-29 15:15:16 -0700603FileState Application::UpdateFileState() {
604 // On every call, check if a different file is present on disk. Note that
605 // while the applications is running, the file cannot be changed without the
606 // inode changing.
607 // We could presumably use inotify or the such to watch the file instead,
608 // but this works and we do not expect substantial cost from reading the inode
609 // of a file every time we send out a status message.
610 if (InodeChanged(path_, pre_fork_inode_)) {
611 switch (file_state_) {
612 case FileState::NO_CHANGE:
613 file_state_ = FileState::CHANGED;
614 break;
615 case FileState::NOT_RUNNING:
616 case FileState::CHANGED_DURING_STARTUP:
617 case FileState::CHANGED:
618 break;
619 }
620 }
621 return file_state_;
622}
623
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800624flatbuffers::Offset<aos::starter::ApplicationStatus>
James Kuszmaul6295a642022-03-22 15:23:59 -0700625Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
626 util::Top *top) {
James Kuszmaul37a56af2023-07-29 15:15:16 -0700627 UpdateFileState();
628
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800629 CHECK_NOTNULL(builder);
630 auto name_fbs = builder->CreateString(name_);
631
James Kuszmaul6295a642022-03-22 15:23:59 -0700632 const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
633 const flatbuffers::Offset<util::ProcessInfo> process_info =
634 valid_pid ? top->InfoForProcess(builder, pid_)
635 : flatbuffers::Offset<util::ProcessInfo>();
636
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800637 aos::starter::ApplicationStatus::Builder status_builder(*builder);
638 status_builder.add_name(name_fbs);
639 status_builder.add_state(status_);
James Kuszmauld42edb42022-01-07 18:00:16 -0800640 if (exit_code_.has_value()) {
641 status_builder.add_last_exit_code(exit_code_.value());
642 }
James Kuszmaul8544c492023-07-31 15:00:38 -0700643 status_builder.add_has_active_timing_report(
644 last_timing_report_ +
645 // Leave a bit of margin on the timing report receipt time, to allow
646 // for timing errors.
647 3 * std::chrono::milliseconds(FLAGS_timing_report_ms) >
648 event_loop_->monotonic_now());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800649 status_builder.add_last_stop_reason(stop_reason_);
650 if (pid_ != -1) {
651 status_builder.add_pid(pid_);
652 status_builder.add_id(id_);
653 }
James Kuszmaul6295a642022-03-22 15:23:59 -0700654 // Note that even if process_info is null, calling add_process_info is fine.
655 status_builder.add_process_info(process_info);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800656 status_builder.add_last_start_time(start_time_.time_since_epoch().count());
James Kuszmaul37a56af2023-07-29 15:15:16 -0700657 status_builder.add_file_state(file_state_);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800658 return status_builder.Finish();
659}
660
661void Application::Terminate() {
662 stop_reason_ = aos::starter::LastStopReason::TERMINATE;
663 DoStop(false);
664 terminating_ = true;
665}
666
667void Application::HandleCommand(aos::starter::Command cmd) {
668 switch (cmd) {
669 case aos::starter::Command::START: {
670 switch (status_) {
671 case aos::starter::State::WAITING: {
672 restart_timer_->Disable();
673 DoStart();
674 break;
675 }
676 case aos::starter::State::STARTING: {
677 break;
678 }
679 case aos::starter::State::RUNNING: {
680 break;
681 }
682 case aos::starter::State::STOPPING: {
683 queue_restart_ = true;
684 break;
685 }
686 case aos::starter::State::STOPPED: {
687 status_ = aos::starter::State::WAITING;
688 DoStart();
689 break;
690 }
691 }
692 break;
693 }
694 case aos::starter::Command::STOP: {
695 stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
696 DoStop(false);
697 break;
698 }
699 case aos::starter::Command::RESTART: {
700 stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
701 DoStop(true);
702 break;
703 }
704 }
705}
706
707bool Application::MaybeHandleSignal() {
708 int status;
709
Sarah Newman21c59202022-06-16 12:36:33 -0700710 if (status_ == aos::starter::State::WAITING ||
711 status_ == aos::starter::State::STOPPED) {
712 // We can't possibly have received a signal meant for this process.
713 return false;
714 }
715
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800716 // Check if the status of this process has changed
Sarah Newman21c59202022-06-16 12:36:33 -0700717 // The PID won't be -1 if this application has ever been run successfully
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800718 if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
719 return false;
720 }
721
722 // Check that the event was the process exiting
723 if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
724 return false;
725 }
726
James Kuszmauld42edb42022-01-07 18:00:16 -0800727 start_timer_->Disable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800728 exit_time_ = event_loop_->monotonic_now();
729 exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
James Kuszmaul37a56af2023-07-29 15:15:16 -0700730 file_state_ = FileState::NOT_RUNNING;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800731
James Kuszmauld42edb42022-01-07 18:00:16 -0800732 if (auto read_result = status_pipes_.read->Read()) {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800733 stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
734 }
735
James Kuszmaulb740f452023-11-14 17:44:29 -0800736 const std::string starter_version_string =
737 absl::StrCat("starter version '",
738 event_loop_->VersionString().value_or("unknown"), "'");
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800739 switch (status_) {
740 case aos::starter::State::STARTING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800741 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700742 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
743 << "Application '" << name_ << "' pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800744 << " exited with status " << exit_code_.value() << " and "
745 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800746 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700747 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
748 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700749 << "Failed to start '" << name_ << "' on pid " << pid_
James Kuszmaulb740f452023-11-14 17:44:29 -0800750 << " : Exited with status " << exit_code_.value() << " and "
751 << starter_version_string;
James Kuszmauld42edb42022-01-07 18:00:16 -0800752 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800753 if (autorestart()) {
754 QueueStart();
755 } else {
756 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700757 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800758 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800759 break;
760 }
761 case aos::starter::State::RUNNING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800762 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700763 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
764 << "Application '" << name_ << "' pid " << pid_
765 << " exited with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800766 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700767 if (quiet_flag_ == QuietLogging::kNo ||
768 quiet_flag_ == QuietLogging::kNotForDebugging) {
James Kuszmaulb740f452023-11-14 17:44:29 -0800769 const std::string version_string =
James Kuszmaul8544c492023-07-31 15:00:38 -0700770 latest_timing_report_version_.has_value()
James Kuszmaulb740f452023-11-14 17:44:29 -0800771 ? absl::StrCat("version '",
772 latest_timing_report_version_.value(), "'")
773 : starter_version_string;
James Kuszmaul8544c492023-07-31 15:00:38 -0700774 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
James Kuszmaulb740f452023-11-14 17:44:29 -0800775 << "Application '" << name_ << "' pid " << pid_ << " "
James Kuszmaul8544c492023-07-31 15:00:38 -0700776 << version_string << " exited unexpectedly with status "
777 << exit_code_.value();
778 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800779 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800780 if (autorestart()) {
781 QueueStart();
782 } else {
783 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700784 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800785 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800786 break;
787 }
788 case aos::starter::State::STOPPING: {
payton.rehl2841b1c2023-05-25 17:23:55 -0700789 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
790 << "Successfully stopped '" << name_ << "' pid: " << pid_
791 << " with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800792 status_ = aos::starter::State::STOPPED;
793
794 // Disable force stop timer since the process already died
795 stop_timer_->Disable();
796
Austin Schuh1cea9032023-07-10 11:56:40 -0700797 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800798 if (terminating_) {
799 return true;
800 }
801
802 if (queue_restart_) {
803 queue_restart_ = false;
804 status_ = aos::starter::State::WAITING;
805 DoStart();
806 }
807 break;
808 }
809 case aos::starter::State::WAITING:
810 case aos::starter::State::STOPPED: {
Sarah Newman21c59202022-06-16 12:36:33 -0700811 __builtin_unreachable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800812 break;
813 }
814 }
815
816 return false;
817}
818
Austin Schuh1cea9032023-07-10 11:56:40 -0700819void Application::OnChange() {
820 for (auto &fn : on_change_) {
821 fn();
822 }
823}
824
Adam Snaider70deaf22023-08-11 13:58:34 -0700825Application::~Application() {
826 start_timer_->Disable();
827 restart_timer_->Disable();
828 stop_timer_->Disable();
829 pipe_timer_->Disable();
830 child_status_handler_->Disable();
831}
832
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800833} // namespace aos::starter