blob: 4d21c61dffea24cfd5f36fd7e283f190b79c8ca7 [file] [log] [blame]
James Kuszmaul3224b8e2022-01-07 19:00:39 -08001#include "aos/starter/subprocess.h"
2
3#include <grp.h>
4#include <pwd.h>
5#include <sys/prctl.h>
6#include <sys/types.h>
7#include <sys/wait.h>
8
9#include "glog/logging.h"
10
James Kuszmaul8544c492023-07-31 15:00:38 -070011#include "aos/flatbuffer_merge.h"
12
James Kuszmaul3224b8e2022-01-07 19:00:39 -080013namespace aos::starter {
14
Philipp Schraderfa8fc492023-09-26 14:52:02 -070015// Blocks all signals while an instance of this class is in scope.
16class ScopedCompleteSignalBlocker {
17 public:
18 ScopedCompleteSignalBlocker() {
19 sigset_t mask;
20 sigfillset(&mask);
21 // Remember the current mask.
22 PCHECK(sigprocmask(SIG_SETMASK, &mask, &old_mask_) == 0);
23 }
24
25 ~ScopedCompleteSignalBlocker() {
26 // Restore the remembered mask.
27 PCHECK(sigprocmask(SIG_SETMASK, &old_mask_, nullptr) == 0);
28 }
29
30 private:
31 sigset_t old_mask_;
32};
33
Austin Schuhbbeb37e2022-08-17 16:19:27 -070034// RAII class to become root and restore back to the original user and group
35// afterwards.
36class Sudo {
37 public:
38 Sudo() {
39 // Save what we were.
40 PCHECK(getresuid(&ruid_, &euid_, &suid_) == 0);
41 PCHECK(getresgid(&rgid_, &egid_, &sgid_) == 0);
42
43 // Become root.
44 PCHECK(setresuid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
45 << ": Failed to become root";
46 PCHECK(setresgid(/* ruid */ 0 /* root */, /* euid */ 0, /* suid */ 0) == 0)
47 << ": Failed to become root";
48 }
49
50 ~Sudo() {
51 // And recover.
52 PCHECK(setresgid(rgid_, egid_, sgid_) == 0);
53 PCHECK(setresuid(ruid_, euid_, suid_) == 0);
54 }
55
56 uid_t ruid_, euid_, suid_;
57 gid_t rgid_, egid_, sgid_;
58};
59
Austin Schuh77e20a32023-08-01 12:25:03 -070060MemoryCGroup::MemoryCGroup(std::string_view name, Create should_create)
61 : cgroup_(absl::StrCat("/sys/fs/cgroup/memory/aos_", name)),
62 should_create_(should_create) {
63 if (should_create_ == Create::kDoCreate) {
64 Sudo sudo;
65 int ret = mkdir(cgroup_.c_str(), 0755);
Austin Schuhbbeb37e2022-08-17 16:19:27 -070066
Austin Schuh77e20a32023-08-01 12:25:03 -070067 if (ret != 0) {
68 if (errno == EEXIST) {
69 PCHECK(rmdir(cgroup_.c_str()) == 0)
70 << ": Failed to remove previous cgroup " << cgroup_;
71 ret = mkdir(cgroup_.c_str(), 0755);
72 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -070073 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -070074
Austin Schuh77e20a32023-08-01 12:25:03 -070075 if (ret != 0) {
76 PLOG(FATAL) << ": Failed to create cgroup aos_" << cgroup_
77 << ", do you have permission?";
78 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -070079 }
80}
81
82void MemoryCGroup::AddTid(pid_t pid) {
83 if (pid == 0) {
84 pid = getpid();
85 }
Austin Schuh77e20a32023-08-01 12:25:03 -070086 if (should_create_ == Create::kDoCreate) {
87 Sudo sudo;
88 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
89 std::to_string(pid));
90 } else {
91 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/tasks").c_str(),
92 std::to_string(pid));
93 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -070094}
95
96void MemoryCGroup::SetLimit(std::string_view limit_name, uint64_t limit_value) {
Austin Schuh77e20a32023-08-01 12:25:03 -070097 if (should_create_ == Create::kDoCreate) {
98 Sudo sudo;
99 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
100 std::to_string(limit_value));
101 } else {
102 util::WriteStringToFileOrDie(absl::StrCat(cgroup_, "/", limit_name).c_str(),
103 std::to_string(limit_value));
104 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700105}
106
107MemoryCGroup::~MemoryCGroup() {
Austin Schuh77e20a32023-08-01 12:25:03 -0700108 if (should_create_ == Create::kDoCreate) {
109 Sudo sudo;
110 PCHECK(rmdir(absl::StrCat(cgroup_).c_str()) == 0);
111 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700112}
113
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800114SignalListener::SignalListener(aos::ShmEventLoop *loop,
115 std::function<void(signalfd_siginfo)> callback)
Austin Schuh1cea9032023-07-10 11:56:40 -0700116 : SignalListener(loop->epoll(), std::move(callback)) {}
117
118SignalListener::SignalListener(aos::internal::EPoll *epoll,
119 std::function<void(signalfd_siginfo)> callback)
120 : SignalListener(epoll, callback,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800121 {SIGHUP, SIGINT, SIGQUIT, SIGABRT, SIGFPE, SIGSEGV,
122 SIGPIPE, SIGTERM, SIGBUS, SIGXCPU, SIGCHLD}) {}
123
124SignalListener::SignalListener(aos::ShmEventLoop *loop,
125 std::function<void(signalfd_siginfo)> callback,
126 std::initializer_list<unsigned int> signals)
Austin Schuh1cea9032023-07-10 11:56:40 -0700127 : SignalListener(loop->epoll(), std::move(callback), std::move(signals)) {}
128
129SignalListener::SignalListener(aos::internal::EPoll *epoll,
130 std::function<void(signalfd_siginfo)> callback,
131 std::initializer_list<unsigned int> signals)
132 : epoll_(epoll), callback_(std::move(callback)), signalfd_(signals) {
133 epoll_->OnReadable(signalfd_.fd(), [this] {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800134 signalfd_siginfo info = signalfd_.Read();
135
136 if (info.ssi_signo == 0) {
137 LOG(WARNING) << "Could not read " << sizeof(signalfd_siginfo) << " bytes";
138 return;
139 }
140
141 callback_(info);
142 });
143}
144
Austin Schuh1cea9032023-07-10 11:56:40 -0700145SignalListener::~SignalListener() { epoll_->DeleteFd(signalfd_.fd()); }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800146
James Kuszmauld42edb42022-01-07 18:00:16 -0800147Application::Application(std::string_view name,
148 std::string_view executable_name,
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800149 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700150 std::function<void()> on_change,
151 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800152 : name_(name),
153 path_(executable_name),
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800154 event_loop_(event_loop),
155 start_timer_(event_loop_->AddTimer([this] {
156 status_ = aos::starter::State::RUNNING;
payton.rehl2841b1c2023-05-25 17:23:55 -0700157 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
158 << "Started '" << name_ << "' pid: " << pid_;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800159 })),
160 restart_timer_(event_loop_->AddTimer([this] { DoStart(); })),
161 stop_timer_(event_loop_->AddTimer([this] {
162 if (kill(pid_, SIGKILL) == 0) {
Philipp Schrader595979d2023-09-13 11:31:48 -0700163 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
164 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700165 << "Failed to stop, sending SIGKILL to '" << name_
166 << "' pid: " << pid_;
Sarah Newman9687e062023-09-08 12:22:27 -0700167 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700168 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
169 quiet_flag_ == QuietLogging::kNotForDebugging)
Sarah Newman9687e062023-09-08 12:22:27 -0700170 << "Failed to send SIGKILL to '" << name_ << "' pid: " << pid_;
171 stop_timer_->Schedule(event_loop_->monotonic_now() +
172 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800173 }
174 })),
James Kuszmauld42edb42022-01-07 18:00:16 -0800175 pipe_timer_(event_loop_->AddTimer([this]() { FetchOutputs(); })),
176 child_status_handler_(
177 event_loop_->AddTimer([this]() { MaybeHandleSignal(); })),
Austin Schuh1cea9032023-07-10 11:56:40 -0700178 on_change_({on_change}),
payton.rehl2841b1c2023-05-25 17:23:55 -0700179 quiet_flag_(quiet_flag) {
Sanjay Narayanan92fdc3d2023-08-25 14:42:56 -0700180 // Every second poll to check if the child is dead. This is used as a
181 // default for the case where the user is not directly catching SIGCHLD and
182 // calling MaybeHandleSignal for us.
183 child_status_handler_->Schedule(event_loop_->monotonic_now(),
184 std::chrono::seconds(1));
James Kuszmauld42edb42022-01-07 18:00:16 -0800185}
186
187Application::Application(const aos::Application *application,
188 aos::EventLoop *event_loop,
payton.rehl2841b1c2023-05-25 17:23:55 -0700189 std::function<void()> on_change,
190 QuietLogging quiet_flag)
James Kuszmauld42edb42022-01-07 18:00:16 -0800191 : Application(application->name()->string_view(),
192 application->has_executable_name()
193 ? application->executable_name()->string_view()
194 : application->name()->string_view(),
payton.rehl2841b1c2023-05-25 17:23:55 -0700195 event_loop, on_change, quiet_flag) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800196 user_name_ = application->has_user() ? application->user()->str() : "";
197 user_ = application->has_user() ? FindUid(user_name_.c_str()) : std::nullopt;
198 group_ = application->has_user() ? FindPrimaryGidForUser(user_name_.c_str())
199 : std::nullopt;
200 autostart_ = application->autostart();
201 autorestart_ = application->autorestart();
202 if (application->has_args()) {
203 set_args(*application->args());
204 }
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700205
206 if (application->has_memory_limit() && application->memory_limit() > 0) {
207 SetMemoryLimit(application->memory_limit());
208 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800209}
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800210
211void Application::DoStart() {
212 if (status_ != aos::starter::State::WAITING) {
213 return;
214 }
215
216 start_timer_->Disable();
217 restart_timer_->Disable();
218
James Kuszmauld42edb42022-01-07 18:00:16 -0800219 status_pipes_ = util::ScopedPipe::MakePipe();
220
221 if (capture_stdout_) {
222 stdout_pipes_ = util::ScopedPipe::MakePipe();
223 stdout_.clear();
224 }
225 if (capture_stderr_) {
226 stderr_pipes_ = util::ScopedPipe::MakePipe();
227 stderr_.clear();
228 }
229
Philipp Schradera6712522023-07-05 20:25:11 -0700230 pipe_timer_->Schedule(event_loop_->monotonic_now(),
231 std::chrono::milliseconds(100));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800232
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700233 {
234 // Block all signals during the fork() call. Together with the default
235 // signal handler restoration below, This prevents signal handlers from
236 // getting called in the child and accidentally affecting the parent. In
237 // particular, the exit handler for shm_event_loop could be called here if
238 // we don't exec() quickly enough.
239 ScopedCompleteSignalBlocker signal_blocker;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800240
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700241 const pid_t pid = fork();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800242
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700243 if (pid != 0) {
244 if (pid == -1) {
245 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
246 quiet_flag_ == QuietLogging::kNotForDebugging)
247 << "Failed to fork '" << name_ << "'";
248 stop_reason_ = aos::starter::LastStopReason::FORK_ERR;
249 status_ = aos::starter::State::STOPPED;
250 } else {
251 pid_ = pid;
252 id_ = next_id_++;
253 start_time_ = event_loop_->monotonic_now();
254 status_ = aos::starter::State::STARTING;
255 latest_timing_report_version_.reset();
256 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
257 << "Starting '" << name_ << "' pid " << pid_;
258
259 // Set up timer which moves application to RUNNING state if it is still
260 // alive in 1 second.
261 start_timer_->Schedule(event_loop_->monotonic_now() +
262 std::chrono::seconds(1));
263 // Since we are the parent process, clear our write-side of all the
264 // pipes.
265 status_pipes_.write.reset();
266 stdout_pipes_.write.reset();
267 stderr_pipes_.write.reset();
268 }
269 OnChange();
270 return;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800271 }
Philipp Schraderfa8fc492023-09-26 14:52:02 -0700272
273 // Clear any signal handlers so that they don't accidentally interfere with
274 // the parent process. Is there a better way to iterate over all the
275 // signals? Right now we're just dealing with the most common ones.
276 for (int signal : {SIGINT, SIGHUP, SIGTERM}) {
277 struct sigaction action;
278 sigemptyset(&action.sa_mask);
279 action.sa_flags = 0;
280 action.sa_handler = SIG_DFL;
281 PCHECK(sigaction(signal, &action, nullptr) == 0);
282 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800283 }
284
Austin Schuhbbeb37e2022-08-17 16:19:27 -0700285 if (memory_cgroup_) {
286 memory_cgroup_->AddTid();
287 }
288
James Kuszmauld42edb42022-01-07 18:00:16 -0800289 // Since we are the child process, clear our read-side of all the pipes.
290 status_pipes_.read.reset();
291 stdout_pipes_.read.reset();
292 stderr_pipes_.read.reset();
293
294 // The status pipe will not be needed if the execve succeeds.
295 status_pipes_.write->SetCloexec();
296
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800297 // Clear out signal mask of parent so forked process receives all signals
298 // normally.
299 sigset_t empty_mask;
300 sigemptyset(&empty_mask);
301 sigprocmask(SIG_SETMASK, &empty_mask, nullptr);
302
303 // Cleanup children if starter dies in a way that is not handled gracefully.
304 if (prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800305 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800306 static_cast<uint32_t>(aos::starter::LastStopReason::SET_PRCTL_ERR));
307 PLOG(FATAL) << "Could not set PR_SET_PDEATHSIG to SIGKILL";
308 }
309
310 if (group_) {
311 CHECK(!user_name_.empty());
312 // The manpage for setgroups says we just need CAP_SETGID, but empirically
313 // we also need the effective UID to be 0 to make it work. user_ must also
314 // be set so we change this effective UID back later.
315 CHECK(user_);
316 if (seteuid(0) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800317 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800318 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
319 PLOG(FATAL) << "Could not seteuid(0) for " << name_
320 << " in preparation for setting groups";
321 }
322 if (initgroups(user_name_.c_str(), *group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800323 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800324 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
325 PLOG(FATAL) << "Could not initialize normal groups for " << name_
326 << " as " << user_name_ << " with " << *group_;
327 }
328 if (setgid(*group_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800329 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800330 static_cast<uint32_t>(aos::starter::LastStopReason::SET_GRP_ERR));
331 PLOG(FATAL) << "Could not set group for " << name_ << " to " << *group_;
332 }
333 }
334
335 if (user_) {
336 if (setuid(*user_) == -1) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800337 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800338 static_cast<uint32_t>(aos::starter::LastStopReason::SET_USR_ERR));
339 PLOG(FATAL) << "Could not set user for " << name_ << " to " << *user_;
340 }
341 }
342
James Kuszmauld42edb42022-01-07 18:00:16 -0800343 if (capture_stdout_) {
344 PCHECK(STDOUT_FILENO == dup2(stdout_pipes_.write->fd(), STDOUT_FILENO));
345 stdout_pipes_.write.reset();
346 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800347
James Kuszmauld42edb42022-01-07 18:00:16 -0800348 if (capture_stderr_) {
349 PCHECK(STDERR_FILENO == dup2(stderr_pipes_.write->fd(), STDERR_FILENO));
350 stderr_pipes_.write.reset();
351 }
352
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700353 if (run_as_sudo_) {
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700354 // For sudo we must supply the actual path
355 args_.insert(args_.begin(), path_);
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700356 args_.insert(args_.begin(), kSudo);
Sarah Newman6d1e53b2022-08-09 14:38:08 -0700357 } else {
358 // argv[0] should be the program name
359 args_.insert(args_.begin(), name_);
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700360 }
James Kuszmauld42edb42022-01-07 18:00:16 -0800361
362 std::vector<char *> cargs = CArgs();
Philipp Schrader790cb542023-07-05 21:06:52 -0700363 const char *path = run_as_sudo_ ? kSudo : path_.c_str();
Sanjay Narayanan01a228f2022-04-26 14:19:30 -0700364 execvp(path, cargs.data());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800365
366 // If we got here, something went wrong
James Kuszmauld42edb42022-01-07 18:00:16 -0800367 status_pipes_.write->Write(
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800368 static_cast<uint32_t>(aos::starter::LastStopReason::EXECV_ERR));
Philipp Schrader595979d2023-09-13 11:31:48 -0700369 PLOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
370 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700371 << "Could not execute " << name_ << " (" << path_ << ')';
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800372
373 _exit(EXIT_FAILURE);
374}
375
James Kuszmaul8544c492023-07-31 15:00:38 -0700376void Application::ObserveTimingReport(
377 const aos::monotonic_clock::time_point send_time,
378 const aos::timing::Report *msg) {
379 if (msg->name()->string_view() == name_ && msg->pid() == pid_ &&
380 msg->has_version()) {
381 latest_timing_report_version_ = msg->version()->str();
382 last_timing_report_ = send_time;
383 }
384}
385
James Kuszmauld42edb42022-01-07 18:00:16 -0800386void Application::FetchOutputs() {
387 if (capture_stdout_) {
388 stdout_pipes_.read->Read(&stdout_);
389 }
390 if (capture_stderr_) {
391 stderr_pipes_.read->Read(&stderr_);
392 }
393}
394
395const std::string &Application::GetStdout() {
396 CHECK(capture_stdout_);
397 FetchOutputs();
398 return stdout_;
399}
400
401const std::string &Application::GetStderr() {
402 CHECK(capture_stderr_);
403 FetchOutputs();
404 return stderr_;
405}
406
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800407void Application::DoStop(bool restart) {
408 // If stop or restart received, the old state of these is no longer applicable
409 // so cancel both.
410 restart_timer_->Disable();
411 start_timer_->Disable();
412
James Kuszmauld42edb42022-01-07 18:00:16 -0800413 FetchOutputs();
414
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800415 switch (status_) {
416 case aos::starter::State::STARTING:
417 case aos::starter::State::RUNNING: {
Philipp Schrader595979d2023-09-13 11:31:48 -0700418 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
419 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700420 << "Stopping '" << name_ << "' pid: " << pid_ << " with signal "
421 << SIGINT;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800422 status_ = aos::starter::State::STOPPING;
423
Philipp Schrader595979d2023-09-13 11:31:48 -0700424 if (kill(pid_, SIGINT) != 0) {
425 PLOG_IF(INFO, quiet_flag_ == QuietLogging::kNo ||
426 quiet_flag_ == QuietLogging::kNotForDebugging)
427 << "Failed to send signal " << SIGINT << " to '" << name_
428 << "' pid: " << pid_;
429 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800430
431 // Watchdog timer to SIGKILL application if it is still running 1 second
432 // after SIGINT
Philipp Schradera6712522023-07-05 20:25:11 -0700433 stop_timer_->Schedule(event_loop_->monotonic_now() +
434 std::chrono::seconds(1));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800435 queue_restart_ = restart;
Austin Schuh1cea9032023-07-10 11:56:40 -0700436 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800437 break;
438 }
439 case aos::starter::State::WAITING: {
440 // If waiting to restart, and receives restart, skip the waiting period
441 // and restart immediately. If stop received, all we have to do is move
442 // to the STOPPED state.
443 if (restart) {
444 DoStart();
445 } else {
446 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700447 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800448 }
449 break;
450 }
451 case aos::starter::State::STOPPING: {
452 // If the application is already stopping, then we just need to update the
453 // restart flag to the most recent status.
454 queue_restart_ = restart;
455 break;
456 }
457 case aos::starter::State::STOPPED: {
458 // Restart immediately if the application is already stopped
459 if (restart) {
460 status_ = aos::starter::State::WAITING;
461 DoStart();
462 }
463 break;
464 }
465 }
466}
467
468void Application::QueueStart() {
469 status_ = aos::starter::State::WAITING;
470
payton.rehl2841b1c2023-05-25 17:23:55 -0700471 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
472 << "Restarting " << name_ << " in 3 seconds";
Philipp Schradera6712522023-07-05 20:25:11 -0700473 restart_timer_->Schedule(event_loop_->monotonic_now() +
474 std::chrono::seconds(3));
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800475 start_timer_->Disable();
476 stop_timer_->Disable();
Austin Schuh1cea9032023-07-10 11:56:40 -0700477 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800478}
479
James Kuszmauld42edb42022-01-07 18:00:16 -0800480std::vector<char *> Application::CArgs() {
481 std::vector<char *> cargs;
482 std::transform(args_.begin(), args_.end(), std::back_inserter(cargs),
483 [](std::string &str) { return str.data(); });
484 cargs.push_back(nullptr);
485 return cargs;
486}
487
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800488void Application::set_args(
489 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v) {
490 args_.clear();
491 std::transform(v.begin(), v.end(), std::back_inserter(args_),
James Kuszmauld42edb42022-01-07 18:00:16 -0800492 [](const flatbuffers::String *str) { return str->str(); });
493}
494
495void Application::set_args(std::vector<std::string> args) {
496 args_ = std::move(args);
497}
498
499void Application::set_capture_stdout(bool capture) {
500 capture_stdout_ = capture;
501}
502
503void Application::set_capture_stderr(bool capture) {
504 capture_stderr_ = capture;
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800505}
506
507std::optional<uid_t> Application::FindUid(const char *name) {
508 // TODO(austin): Use the reentrant version. This should be safe.
509 struct passwd *user_data = getpwnam(name);
510 if (user_data != nullptr) {
511 return user_data->pw_uid;
512 } else {
513 LOG(FATAL) << "Could not find user " << name;
514 return std::nullopt;
515 }
516}
517
518std::optional<gid_t> Application::FindPrimaryGidForUser(const char *name) {
519 // TODO(austin): Use the reentrant version. This should be safe.
520 struct passwd *user_data = getpwnam(name);
521 if (user_data != nullptr) {
522 return user_data->pw_gid;
523 } else {
524 LOG(FATAL) << "Could not find user " << name;
525 return std::nullopt;
526 }
527}
528
529flatbuffers::Offset<aos::starter::ApplicationStatus>
James Kuszmaul6295a642022-03-22 15:23:59 -0700530Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
531 util::Top *top) {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800532 CHECK_NOTNULL(builder);
533 auto name_fbs = builder->CreateString(name_);
534
James Kuszmaul6295a642022-03-22 15:23:59 -0700535 const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
536 const flatbuffers::Offset<util::ProcessInfo> process_info =
537 valid_pid ? top->InfoForProcess(builder, pid_)
538 : flatbuffers::Offset<util::ProcessInfo>();
539
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800540 aos::starter::ApplicationStatus::Builder status_builder(*builder);
541 status_builder.add_name(name_fbs);
542 status_builder.add_state(status_);
James Kuszmauld42edb42022-01-07 18:00:16 -0800543 if (exit_code_.has_value()) {
544 status_builder.add_last_exit_code(exit_code_.value());
545 }
James Kuszmaul8544c492023-07-31 15:00:38 -0700546 status_builder.add_has_active_timing_report(
547 last_timing_report_ +
548 // Leave a bit of margin on the timing report receipt time, to allow
549 // for timing errors.
550 3 * std::chrono::milliseconds(FLAGS_timing_report_ms) >
551 event_loop_->monotonic_now());
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800552 status_builder.add_last_stop_reason(stop_reason_);
553 if (pid_ != -1) {
554 status_builder.add_pid(pid_);
555 status_builder.add_id(id_);
556 }
James Kuszmaul6295a642022-03-22 15:23:59 -0700557 // Note that even if process_info is null, calling add_process_info is fine.
558 status_builder.add_process_info(process_info);
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800559 status_builder.add_last_start_time(start_time_.time_since_epoch().count());
560 return status_builder.Finish();
561}
562
563void Application::Terminate() {
564 stop_reason_ = aos::starter::LastStopReason::TERMINATE;
565 DoStop(false);
566 terminating_ = true;
567}
568
569void Application::HandleCommand(aos::starter::Command cmd) {
570 switch (cmd) {
571 case aos::starter::Command::START: {
572 switch (status_) {
573 case aos::starter::State::WAITING: {
574 restart_timer_->Disable();
575 DoStart();
576 break;
577 }
578 case aos::starter::State::STARTING: {
579 break;
580 }
581 case aos::starter::State::RUNNING: {
582 break;
583 }
584 case aos::starter::State::STOPPING: {
585 queue_restart_ = true;
586 break;
587 }
588 case aos::starter::State::STOPPED: {
589 status_ = aos::starter::State::WAITING;
590 DoStart();
591 break;
592 }
593 }
594 break;
595 }
596 case aos::starter::Command::STOP: {
597 stop_reason_ = aos::starter::LastStopReason::STOP_REQUESTED;
598 DoStop(false);
599 break;
600 }
601 case aos::starter::Command::RESTART: {
602 stop_reason_ = aos::starter::LastStopReason::RESTART_REQUESTED;
603 DoStop(true);
604 break;
605 }
606 }
607}
608
609bool Application::MaybeHandleSignal() {
610 int status;
611
Sarah Newman21c59202022-06-16 12:36:33 -0700612 if (status_ == aos::starter::State::WAITING ||
613 status_ == aos::starter::State::STOPPED) {
614 // We can't possibly have received a signal meant for this process.
615 return false;
616 }
617
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800618 // Check if the status of this process has changed
Sarah Newman21c59202022-06-16 12:36:33 -0700619 // The PID won't be -1 if this application has ever been run successfully
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800620 if (pid_ == -1 || waitpid(pid_, &status, WNOHANG) != pid_) {
621 return false;
622 }
623
624 // Check that the event was the process exiting
625 if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
626 return false;
627 }
628
James Kuszmauld42edb42022-01-07 18:00:16 -0800629 start_timer_->Disable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800630 exit_time_ = event_loop_->monotonic_now();
631 exit_code_ = WIFEXITED(status) ? WEXITSTATUS(status) : WTERMSIG(status);
632
James Kuszmauld42edb42022-01-07 18:00:16 -0800633 if (auto read_result = status_pipes_.read->Read()) {
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800634 stop_reason_ = static_cast<aos::starter::LastStopReason>(*read_result);
635 }
636
637 switch (status_) {
638 case aos::starter::State::STARTING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800639 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700640 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
641 << "Application '" << name_ << "' pid " << pid_
642 << " exited with status " << exit_code_.value();
James Kuszmauld42edb42022-01-07 18:00:16 -0800643 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700644 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo ||
645 quiet_flag_ == QuietLogging::kNotForDebugging)
payton.rehl2841b1c2023-05-25 17:23:55 -0700646 << "Failed to start '" << name_ << "' on pid " << pid_
647 << " : Exited with status " << exit_code_.value();
James Kuszmauld42edb42022-01-07 18:00:16 -0800648 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800649 if (autorestart()) {
650 QueueStart();
651 } else {
652 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700653 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800654 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800655 break;
656 }
657 case aos::starter::State::RUNNING: {
James Kuszmauld42edb42022-01-07 18:00:16 -0800658 if (exit_code_.value() == 0) {
payton.rehl2841b1c2023-05-25 17:23:55 -0700659 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
660 << "Application '" << name_ << "' pid " << pid_
661 << " exited with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800662 } else {
Philipp Schrader595979d2023-09-13 11:31:48 -0700663 if (quiet_flag_ == QuietLogging::kNo ||
664 quiet_flag_ == QuietLogging::kNotForDebugging) {
James Kuszmaul8544c492023-07-31 15:00:38 -0700665 std::string version_string =
666 latest_timing_report_version_.has_value()
667 ? absl::StrCat("'", latest_timing_report_version_.value(),
668 "'")
669 : "unknown";
670 LOG_IF(WARNING, quiet_flag_ == QuietLogging::kNo)
671 << "Application '" << name_ << "' pid " << pid_ << " version "
672 << version_string << " exited unexpectedly with status "
673 << exit_code_.value();
674 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800675 }
James Kuszmaul6f10b382022-03-11 22:31:38 -0800676 if (autorestart()) {
677 QueueStart();
678 } else {
679 status_ = aos::starter::State::STOPPED;
Austin Schuh1cea9032023-07-10 11:56:40 -0700680 OnChange();
James Kuszmaul6f10b382022-03-11 22:31:38 -0800681 }
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800682 break;
683 }
684 case aos::starter::State::STOPPING: {
payton.rehl2841b1c2023-05-25 17:23:55 -0700685 LOG_IF(INFO, quiet_flag_ == QuietLogging::kNo)
686 << "Successfully stopped '" << name_ << "' pid: " << pid_
687 << " with status " << exit_code_.value();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800688 status_ = aos::starter::State::STOPPED;
689
690 // Disable force stop timer since the process already died
691 stop_timer_->Disable();
692
Austin Schuh1cea9032023-07-10 11:56:40 -0700693 OnChange();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800694 if (terminating_) {
695 return true;
696 }
697
698 if (queue_restart_) {
699 queue_restart_ = false;
700 status_ = aos::starter::State::WAITING;
701 DoStart();
702 }
703 break;
704 }
705 case aos::starter::State::WAITING:
706 case aos::starter::State::STOPPED: {
Sarah Newman21c59202022-06-16 12:36:33 -0700707 __builtin_unreachable();
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800708 break;
709 }
710 }
711
712 return false;
713}
714
Austin Schuh1cea9032023-07-10 11:56:40 -0700715void Application::OnChange() {
716 for (auto &fn : on_change_) {
717 fn();
718 }
719}
720
Adam Snaider70deaf22023-08-11 13:58:34 -0700721Application::~Application() {
722 start_timer_->Disable();
723 restart_timer_->Disable();
724 stop_timer_->Disable();
725 pipe_timer_->Disable();
726 child_status_handler_->Disable();
727}
728
James Kuszmaul3224b8e2022-01-07 19:00:39 -0800729} // namespace aos::starter