James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 1 | #ifndef AOS_STARTER_SUBPROCESS_H_ |
| 2 | #define AOS_STARTER_SUBPROCESS_H_ |
| 3 | |
Stephan Pleines | f581a07 | 2024-05-23 20:59:27 -0700 | [diff] [blame^] | 4 | #include <stdint.h> |
| 5 | #include <sys/signalfd.h> |
| 6 | #include <sys/types.h> |
| 7 | |
| 8 | #include <algorithm> |
| 9 | #include <chrono> |
| 10 | #include <filesystem> // IWYU pragma: keep |
| 11 | #include <functional> |
| 12 | #include <initializer_list> |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 13 | #include <memory> |
Stephan Pleines | f581a07 | 2024-05-23 20:59:27 -0700 | [diff] [blame^] | 14 | #include <optional> |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 15 | #include <string> |
Stephan Pleines | f581a07 | 2024-05-23 20:59:27 -0700 | [diff] [blame^] | 16 | #include <string_view> |
| 17 | #include <utility> |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 18 | #include <vector> |
| 19 | |
Stephan Pleines | f581a07 | 2024-05-23 20:59:27 -0700 | [diff] [blame^] | 20 | #include "flatbuffers/buffer.h" |
| 21 | #include "flatbuffers/flatbuffer_builder.h" |
| 22 | #include "flatbuffers/string.h" |
| 23 | #include "flatbuffers/vector.h" |
| 24 | |
| 25 | #include "aos/configuration_generated.h" |
| 26 | #include "aos/events/epoll.h" |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 27 | #include "aos/events/event_loop.h" |
Stephan Pleines | f581a07 | 2024-05-23 20:59:27 -0700 | [diff] [blame^] | 28 | #include "aos/events/event_loop_generated.h" |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 29 | #include "aos/events/shm_event_loop.h" |
Stephan Pleines | f581a07 | 2024-05-23 20:59:27 -0700 | [diff] [blame^] | 30 | #include "aos/ipc_lib/signalfd.h" |
| 31 | #include "aos/macros.h" |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 32 | #include "aos/starter/starter_generated.h" |
| 33 | #include "aos/starter/starter_rpc_generated.h" |
Stephan Pleines | f581a07 | 2024-05-23 20:59:27 -0700 | [diff] [blame^] | 34 | #include "aos/time/time.h" |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 35 | #include "aos/util/scoped_pipe.h" |
James Kuszmaul | 6295a64 | 2022-03-22 15:23:59 -0700 | [diff] [blame] | 36 | #include "aos/util/top.h" |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 37 | |
| 38 | namespace aos::starter { |
| 39 | |
James Kuszmaul | 37a56af | 2023-07-29 15:15:16 -0700 | [diff] [blame] | 40 | // Replicates the path resolution that will be attempted by the shell or |
| 41 | // commands like execvp. Doing this manually allows us to conveniently know what |
| 42 | // is actually being executed (rather than, e.g., querying /proc/$pid/exe after |
| 43 | // the execvp() call is executed). |
| 44 | // This is also useful when using the below class with sudo or bash scripts, |
| 45 | // because in those circumstances /proc/$pid/exe contains sudo and /bin/bash (or |
| 46 | // similar binary), rather than the actual thing being executed. |
| 47 | std::filesystem::path ResolvePath(std::string_view command); |
| 48 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 49 | // Registers a signalfd listener with the given event loop and calls callback |
| 50 | // whenever a signal is received. |
| 51 | class SignalListener { |
| 52 | public: |
| 53 | SignalListener(aos::ShmEventLoop *loop, |
| 54 | std::function<void(signalfd_siginfo)> callback); |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 55 | SignalListener(aos::internal::EPoll *epoll, |
| 56 | std::function<void(signalfd_siginfo)> callback); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 57 | SignalListener(aos::ShmEventLoop *loop, |
| 58 | std::function<void(signalfd_siginfo)> callback, |
| 59 | std::initializer_list<unsigned int> signals); |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 60 | SignalListener(aos::internal::EPoll *epoll, |
| 61 | std::function<void(signalfd_siginfo)> callback, |
| 62 | std::initializer_list<unsigned int> signals); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 63 | |
| 64 | ~SignalListener(); |
| 65 | |
| 66 | private: |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 67 | aos::internal::EPoll *epoll_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 68 | std::function<void(signalfd_siginfo)> callback_; |
| 69 | aos::ipc_lib::SignalFd signalfd_; |
| 70 | |
| 71 | DISALLOW_COPY_AND_ASSIGN(SignalListener); |
| 72 | }; |
| 73 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 74 | // Class to use the V1 cgroup API to limit memory usage. |
| 75 | class MemoryCGroup { |
| 76 | public: |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 77 | // Enum to control if MemoryCGroup should create the cgroup and remove it on |
| 78 | // its own, or if it should assume it already exists and just use it. |
| 79 | enum class Create { |
| 80 | kDoCreate, |
| 81 | kDoNotCreate, |
| 82 | }; |
| 83 | |
| 84 | MemoryCGroup(std::string_view name, Create should_create = Create::kDoCreate); |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 85 | ~MemoryCGroup(); |
| 86 | |
| 87 | // Adds a thread ID to be managed by the cgroup. |
| 88 | void AddTid(pid_t pid = 0); |
| 89 | |
| 90 | // Sets the provided limit to the provided value. |
| 91 | void SetLimit(std::string_view limit_name, uint64_t limit_value); |
| 92 | |
| 93 | private: |
| 94 | std::string cgroup_; |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 95 | Create should_create_; |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 96 | }; |
| 97 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 98 | // Manages a running process, allowing starting and stopping, and restarting |
| 99 | // automatically. |
| 100 | class Application { |
| 101 | public: |
Philipp Schrader | 595979d | 2023-09-13 11:31:48 -0700 | [diff] [blame] | 102 | enum class QuietLogging { |
| 103 | kYes, |
| 104 | kNo, |
| 105 | // For debugging child processes not behaving as expected. When a child |
| 106 | // experiences an event such as exiting with an error code or dying to due a |
| 107 | // signal, this option will cause a log statement to be printed. |
| 108 | kNotForDebugging, |
| 109 | }; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 110 | Application(const aos::Application *application, aos::EventLoop *event_loop, |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 111 | std::function<void()> on_change, |
| 112 | QuietLogging quiet_flag = QuietLogging::kNo); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 113 | |
Sarah Newman | 2c1b121 | 2022-08-10 10:05:48 -0700 | [diff] [blame] | 114 | // executable_name is the actual executable path. |
| 115 | // When sudo is not used, name is used as argv[0] when exec'ing |
| 116 | // executable_name. When sudo is used it's not possible to pass in a |
| 117 | // distinct argv[0]. |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 118 | Application(std::string_view name, std::string_view executable_name, |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 119 | aos::EventLoop *event_loop, std::function<void()> on_change, |
| 120 | QuietLogging quiet_flag = QuietLogging::kNo); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 121 | |
Adam Snaider | 70deaf2 | 2023-08-11 13:58:34 -0700 | [diff] [blame] | 122 | ~Application(); |
| 123 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 124 | flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus( |
James Kuszmaul | 6295a64 | 2022-03-22 15:23:59 -0700 | [diff] [blame] | 125 | flatbuffers::FlatBufferBuilder *builder, util::Top *top); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 126 | aos::starter::State status() const { return status_; }; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 127 | |
| 128 | // Returns the last pid of this process. -1 if not started yet. |
| 129 | pid_t get_pid() const { return pid_; } |
| 130 | |
| 131 | // Handles a SIGCHLD signal received by the parent. Does nothing if this |
| 132 | // process was not the target. Returns true if this Application should be |
| 133 | // removed. |
| 134 | bool MaybeHandleSignal(); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 135 | void DisableChildDeathPolling() { child_status_handler_->Disable(); } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 136 | |
| 137 | // Handles a command. May do nothing if application is already in the desired |
| 138 | // state. |
| 139 | void HandleCommand(aos::starter::Command cmd); |
| 140 | |
| 141 | void Start() { HandleCommand(aos::starter::Command::START); } |
| 142 | |
Sanjay Narayanan | 92fdc3d | 2023-08-25 14:42:56 -0700 | [diff] [blame] | 143 | // Stops the command by sending a SIGINT first, followed by a SIGKILL if it's |
| 144 | // still alive in 1s. |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 145 | void Stop() { HandleCommand(aos::starter::Command::STOP); } |
| 146 | |
Sanjay Narayanan | 92fdc3d | 2023-08-25 14:42:56 -0700 | [diff] [blame] | 147 | // Stops the command the same way as Stop() does, but updates internal state |
| 148 | // to reflect that the application was terminated. |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 149 | void Terminate(); |
| 150 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 151 | // Adds a callback which gets notified when the application changes state. |
| 152 | // This is in addition to any existing callbacks and doesn't replace any of |
| 153 | // them. |
| 154 | void AddOnChange(std::function<void()> fn) { |
| 155 | on_change_.emplace_back(std::move(fn)); |
| 156 | } |
| 157 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 158 | void set_args(std::vector<std::string> args); |
| 159 | void set_capture_stdout(bool capture); |
| 160 | void set_capture_stderr(bool capture); |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 161 | void set_run_as_sudo(bool value) { run_as_sudo_ = value; } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 162 | |
Philipp Schrader | c8e779e | 2024-01-25 16:32:39 -0800 | [diff] [blame] | 163 | // Sets the time for a process to stop gracefully. If an application is asked |
| 164 | // to stop, but doesn't stop within the specified time limit, then it is |
| 165 | // forcefully killed. Defaults to 1 second unless overridden by the |
| 166 | // aos::Application instance in the constructor. |
| 167 | void set_stop_grace_period(std::chrono::nanoseconds stop_grace_period) { |
| 168 | stop_grace_period_ = stop_grace_period; |
| 169 | } |
| 170 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 171 | bool autostart() const { return autostart_; } |
| 172 | |
| 173 | bool autorestart() const { return autorestart_; } |
Adam Snaider | 70deaf2 | 2023-08-11 13:58:34 -0700 | [diff] [blame] | 174 | void set_autorestart(bool autorestart) { autorestart_ = autorestart; } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 175 | |
Philipp Schrader | 595979d | 2023-09-13 11:31:48 -0700 | [diff] [blame] | 176 | LastStopReason stop_reason() const { return stop_reason_; } |
| 177 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 178 | const std::string &GetStdout(); |
| 179 | const std::string &GetStderr(); |
| 180 | std::optional<int> exit_code() const { return exit_code_; } |
| 181 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 182 | // Sets the memory limit for the application to the provided limit. |
| 183 | void SetMemoryLimit(size_t limit) { |
| 184 | if (!memory_cgroup_) { |
| 185 | memory_cgroup_ = std::make_unique<MemoryCGroup>(name_); |
| 186 | } |
| 187 | memory_cgroup_->SetLimit("memory.limit_in_bytes", limit); |
| 188 | } |
| 189 | |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 190 | // Sets the cgroup and memory limit to a pre-existing cgroup which is |
| 191 | // externally managed. This lets us configure the cgroup of an application |
| 192 | // without root access. |
| 193 | void SetExistingCgroupMemoryLimit(std::string_view name, size_t limit) { |
| 194 | if (!memory_cgroup_) { |
| 195 | memory_cgroup_ = std::make_unique<MemoryCGroup>( |
| 196 | name, MemoryCGroup::Create::kDoNotCreate); |
| 197 | } |
| 198 | memory_cgroup_->SetLimit("memory.limit_in_bytes", limit); |
| 199 | } |
| 200 | |
James Kuszmaul | 8544c49 | 2023-07-31 15:00:38 -0700 | [diff] [blame] | 201 | // Observe a timing report message, and save it if it is relevant to us. |
| 202 | // It is the responsibility of the caller to manage this, because the lifetime |
| 203 | // of the Application itself is such that it cannot own Fetchers readily. |
| 204 | void ObserveTimingReport(const aos::monotonic_clock::time_point send_time, |
| 205 | const aos::timing::Report *msg); |
| 206 | |
James Kuszmaul | 37a56af | 2023-07-29 15:15:16 -0700 | [diff] [blame] | 207 | FileState UpdateFileState(); |
| 208 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 209 | private: |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 210 | typedef aos::util::ScopedPipe::PipePair PipePair; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 211 | |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 212 | static constexpr const char *const kSudo{"sudo"}; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 213 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 214 | void set_args( |
| 215 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> |
| 216 | &args); |
| 217 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 218 | void DoStart(); |
| 219 | |
| 220 | void DoStop(bool restart); |
| 221 | |
| 222 | void QueueStart(); |
| 223 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 224 | void OnChange(); |
| 225 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 226 | // Copy flatbuffer vector of strings to vector of std::string. |
| 227 | static std::vector<std::string> FbsVectorToVector( |
| 228 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v); |
| 229 | |
| 230 | static std::optional<uid_t> FindUid(const char *name); |
| 231 | static std::optional<gid_t> FindPrimaryGidForUser(const char *name); |
| 232 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 233 | void FetchOutputs(); |
| 234 | |
| 235 | // Provides an std::vector of the args (such that CArgs().data() ends up being |
| 236 | // suitable to pass to execve()). |
| 237 | // The points are invalidated when args_ changes (e.g., due to a set_args |
| 238 | // call). |
| 239 | std::vector<char *> CArgs(); |
| 240 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 241 | // Next unique id for all applications |
| 242 | static inline uint64_t next_id_ = 0; |
| 243 | |
| 244 | std::string name_; |
James Kuszmaul | 37a56af | 2023-07-29 15:15:16 -0700 | [diff] [blame] | 245 | std::filesystem::path path_; |
| 246 | // Inode of path_ immediately prior to the most recent fork() call. |
| 247 | ino_t pre_fork_inode_; |
| 248 | FileState file_state_ = FileState::NOT_RUNNING; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 249 | std::vector<std::string> args_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 250 | std::string user_name_; |
| 251 | std::optional<uid_t> user_; |
| 252 | std::optional<gid_t> group_; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 253 | bool run_as_sudo_ = false; |
Philipp Schrader | c8e779e | 2024-01-25 16:32:39 -0800 | [diff] [blame] | 254 | std::chrono::nanoseconds stop_grace_period_ = std::chrono::seconds(1); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 255 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 256 | bool capture_stdout_ = false; |
| 257 | PipePair stdout_pipes_; |
| 258 | std::string stdout_; |
| 259 | bool capture_stderr_ = false; |
| 260 | PipePair stderr_pipes_; |
| 261 | std::string stderr_; |
| 262 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 263 | pid_t pid_ = -1; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 264 | PipePair status_pipes_; |
| 265 | uint64_t id_ = 0; |
| 266 | std::optional<int> exit_code_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 267 | aos::monotonic_clock::time_point start_time_, exit_time_; |
| 268 | bool queue_restart_ = false; |
| 269 | bool terminating_ = false; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 270 | bool autostart_ = false; |
| 271 | bool autorestart_ = false; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 272 | |
| 273 | aos::starter::State status_ = aos::starter::State::STOPPED; |
| 274 | aos::starter::LastStopReason stop_reason_ = |
| 275 | aos::starter::LastStopReason::STOP_REQUESTED; |
| 276 | |
| 277 | aos::EventLoop *event_loop_; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 278 | aos::TimerHandler *start_timer_, *restart_timer_, *stop_timer_, *pipe_timer_, |
| 279 | *child_status_handler_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 280 | |
James Kuszmaul | 8544c49 | 2023-07-31 15:00:38 -0700 | [diff] [blame] | 281 | // Version string from the most recent valid timing report for this |
| 282 | // application. Cleared when the application restarts. |
| 283 | std::optional<std::string> latest_timing_report_version_; |
| 284 | aos::monotonic_clock::time_point last_timing_report_ = |
| 285 | aos::monotonic_clock::min_time; |
| 286 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 287 | std::vector<std::function<void()>> on_change_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 288 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 289 | std::unique_ptr<MemoryCGroup> memory_cgroup_; |
| 290 | |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 291 | QuietLogging quiet_flag_ = QuietLogging::kNo; |
| 292 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 293 | DISALLOW_COPY_AND_ASSIGN(Application); |
| 294 | }; |
| 295 | |
| 296 | } // namespace aos::starter |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 297 | #endif // AOS_STARTER_SUBPROCESS_H_ |