James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 1 | #ifndef AOS_STARTER_SUBPROCESS_H_ |
| 2 | #define AOS_STARTER_SUBPROCESS_H_ |
| 3 | |
James Kuszmaul | 37a56af | 2023-07-29 15:15:16 -0700 | [diff] [blame] | 4 | #include <filesystem> |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 5 | #include <memory> |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 6 | #include <string> |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 7 | #include <tuple> |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 8 | #include <vector> |
| 9 | |
| 10 | #include "aos/events/event_loop.h" |
| 11 | #include "aos/events/shm_event_loop.h" |
| 12 | #include "aos/starter/starter_generated.h" |
| 13 | #include "aos/starter/starter_rpc_generated.h" |
| 14 | #include "aos/util/scoped_pipe.h" |
James Kuszmaul | 6295a64 | 2022-03-22 15:23:59 -0700 | [diff] [blame] | 15 | #include "aos/util/top.h" |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 16 | |
| 17 | namespace aos::starter { |
| 18 | |
James Kuszmaul | 37a56af | 2023-07-29 15:15:16 -0700 | [diff] [blame] | 19 | // Replicates the path resolution that will be attempted by the shell or |
| 20 | // commands like execvp. Doing this manually allows us to conveniently know what |
| 21 | // is actually being executed (rather than, e.g., querying /proc/$pid/exe after |
| 22 | // the execvp() call is executed). |
| 23 | // This is also useful when using the below class with sudo or bash scripts, |
| 24 | // because in those circumstances /proc/$pid/exe contains sudo and /bin/bash (or |
| 25 | // similar binary), rather than the actual thing being executed. |
| 26 | std::filesystem::path ResolvePath(std::string_view command); |
| 27 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 28 | // Registers a signalfd listener with the given event loop and calls callback |
| 29 | // whenever a signal is received. |
| 30 | class SignalListener { |
| 31 | public: |
| 32 | SignalListener(aos::ShmEventLoop *loop, |
| 33 | std::function<void(signalfd_siginfo)> callback); |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 34 | SignalListener(aos::internal::EPoll *epoll, |
| 35 | std::function<void(signalfd_siginfo)> callback); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 36 | SignalListener(aos::ShmEventLoop *loop, |
| 37 | std::function<void(signalfd_siginfo)> callback, |
| 38 | std::initializer_list<unsigned int> signals); |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 39 | SignalListener(aos::internal::EPoll *epoll, |
| 40 | std::function<void(signalfd_siginfo)> callback, |
| 41 | std::initializer_list<unsigned int> signals); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 42 | |
| 43 | ~SignalListener(); |
| 44 | |
| 45 | private: |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 46 | aos::internal::EPoll *epoll_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 47 | std::function<void(signalfd_siginfo)> callback_; |
| 48 | aos::ipc_lib::SignalFd signalfd_; |
| 49 | |
| 50 | DISALLOW_COPY_AND_ASSIGN(SignalListener); |
| 51 | }; |
| 52 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 53 | // Class to use the V1 cgroup API to limit memory usage. |
| 54 | class MemoryCGroup { |
| 55 | public: |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 56 | // Enum to control if MemoryCGroup should create the cgroup and remove it on |
| 57 | // its own, or if it should assume it already exists and just use it. |
| 58 | enum class Create { |
| 59 | kDoCreate, |
| 60 | kDoNotCreate, |
| 61 | }; |
| 62 | |
| 63 | MemoryCGroup(std::string_view name, Create should_create = Create::kDoCreate); |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 64 | ~MemoryCGroup(); |
| 65 | |
| 66 | // Adds a thread ID to be managed by the cgroup. |
| 67 | void AddTid(pid_t pid = 0); |
| 68 | |
| 69 | // Sets the provided limit to the provided value. |
| 70 | void SetLimit(std::string_view limit_name, uint64_t limit_value); |
| 71 | |
| 72 | private: |
| 73 | std::string cgroup_; |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 74 | Create should_create_; |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 75 | }; |
| 76 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 77 | // Manages a running process, allowing starting and stopping, and restarting |
| 78 | // automatically. |
| 79 | class Application { |
| 80 | public: |
Philipp Schrader | 595979d | 2023-09-13 11:31:48 -0700 | [diff] [blame] | 81 | enum class QuietLogging { |
| 82 | kYes, |
| 83 | kNo, |
| 84 | // For debugging child processes not behaving as expected. When a child |
| 85 | // experiences an event such as exiting with an error code or dying to due a |
| 86 | // signal, this option will cause a log statement to be printed. |
| 87 | kNotForDebugging, |
| 88 | }; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 89 | Application(const aos::Application *application, aos::EventLoop *event_loop, |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 90 | std::function<void()> on_change, |
| 91 | QuietLogging quiet_flag = QuietLogging::kNo); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 92 | |
Sarah Newman | 2c1b121 | 2022-08-10 10:05:48 -0700 | [diff] [blame] | 93 | // executable_name is the actual executable path. |
| 94 | // When sudo is not used, name is used as argv[0] when exec'ing |
| 95 | // executable_name. When sudo is used it's not possible to pass in a |
| 96 | // distinct argv[0]. |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 97 | Application(std::string_view name, std::string_view executable_name, |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 98 | aos::EventLoop *event_loop, std::function<void()> on_change, |
| 99 | QuietLogging quiet_flag = QuietLogging::kNo); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 100 | |
Adam Snaider | 70deaf2 | 2023-08-11 13:58:34 -0700 | [diff] [blame] | 101 | ~Application(); |
| 102 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 103 | flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus( |
James Kuszmaul | 6295a64 | 2022-03-22 15:23:59 -0700 | [diff] [blame] | 104 | flatbuffers::FlatBufferBuilder *builder, util::Top *top); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 105 | aos::starter::State status() const { return status_; }; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 106 | |
| 107 | // Returns the last pid of this process. -1 if not started yet. |
| 108 | pid_t get_pid() const { return pid_; } |
| 109 | |
| 110 | // Handles a SIGCHLD signal received by the parent. Does nothing if this |
| 111 | // process was not the target. Returns true if this Application should be |
| 112 | // removed. |
| 113 | bool MaybeHandleSignal(); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 114 | void DisableChildDeathPolling() { child_status_handler_->Disable(); } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 115 | |
| 116 | // Handles a command. May do nothing if application is already in the desired |
| 117 | // state. |
| 118 | void HandleCommand(aos::starter::Command cmd); |
| 119 | |
| 120 | void Start() { HandleCommand(aos::starter::Command::START); } |
| 121 | |
Sanjay Narayanan | 92fdc3d | 2023-08-25 14:42:56 -0700 | [diff] [blame] | 122 | // Stops the command by sending a SIGINT first, followed by a SIGKILL if it's |
| 123 | // still alive in 1s. |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 124 | void Stop() { HandleCommand(aos::starter::Command::STOP); } |
| 125 | |
Sanjay Narayanan | 92fdc3d | 2023-08-25 14:42:56 -0700 | [diff] [blame] | 126 | // Stops the command the same way as Stop() does, but updates internal state |
| 127 | // to reflect that the application was terminated. |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 128 | void Terminate(); |
| 129 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 130 | // Adds a callback which gets notified when the application changes state. |
| 131 | // This is in addition to any existing callbacks and doesn't replace any of |
| 132 | // them. |
| 133 | void AddOnChange(std::function<void()> fn) { |
| 134 | on_change_.emplace_back(std::move(fn)); |
| 135 | } |
| 136 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 137 | void set_args(std::vector<std::string> args); |
| 138 | void set_capture_stdout(bool capture); |
| 139 | void set_capture_stderr(bool capture); |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 140 | void set_run_as_sudo(bool value) { run_as_sudo_ = value; } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 141 | |
Philipp Schrader | c8e779e | 2024-01-25 16:32:39 -0800 | [diff] [blame^] | 142 | // Sets the time for a process to stop gracefully. If an application is asked |
| 143 | // to stop, but doesn't stop within the specified time limit, then it is |
| 144 | // forcefully killed. Defaults to 1 second unless overridden by the |
| 145 | // aos::Application instance in the constructor. |
| 146 | void set_stop_grace_period(std::chrono::nanoseconds stop_grace_period) { |
| 147 | stop_grace_period_ = stop_grace_period; |
| 148 | } |
| 149 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 150 | bool autostart() const { return autostart_; } |
| 151 | |
| 152 | bool autorestart() const { return autorestart_; } |
Adam Snaider | 70deaf2 | 2023-08-11 13:58:34 -0700 | [diff] [blame] | 153 | void set_autorestart(bool autorestart) { autorestart_ = autorestart; } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 154 | |
Philipp Schrader | 595979d | 2023-09-13 11:31:48 -0700 | [diff] [blame] | 155 | LastStopReason stop_reason() const { return stop_reason_; } |
| 156 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 157 | const std::string &GetStdout(); |
| 158 | const std::string &GetStderr(); |
| 159 | std::optional<int> exit_code() const { return exit_code_; } |
| 160 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 161 | // Sets the memory limit for the application to the provided limit. |
| 162 | void SetMemoryLimit(size_t limit) { |
| 163 | if (!memory_cgroup_) { |
| 164 | memory_cgroup_ = std::make_unique<MemoryCGroup>(name_); |
| 165 | } |
| 166 | memory_cgroup_->SetLimit("memory.limit_in_bytes", limit); |
| 167 | } |
| 168 | |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 169 | // Sets the cgroup and memory limit to a pre-existing cgroup which is |
| 170 | // externally managed. This lets us configure the cgroup of an application |
| 171 | // without root access. |
| 172 | void SetExistingCgroupMemoryLimit(std::string_view name, size_t limit) { |
| 173 | if (!memory_cgroup_) { |
| 174 | memory_cgroup_ = std::make_unique<MemoryCGroup>( |
| 175 | name, MemoryCGroup::Create::kDoNotCreate); |
| 176 | } |
| 177 | memory_cgroup_->SetLimit("memory.limit_in_bytes", limit); |
| 178 | } |
| 179 | |
James Kuszmaul | 8544c49 | 2023-07-31 15:00:38 -0700 | [diff] [blame] | 180 | // Observe a timing report message, and save it if it is relevant to us. |
| 181 | // It is the responsibility of the caller to manage this, because the lifetime |
| 182 | // of the Application itself is such that it cannot own Fetchers readily. |
| 183 | void ObserveTimingReport(const aos::monotonic_clock::time_point send_time, |
| 184 | const aos::timing::Report *msg); |
| 185 | |
James Kuszmaul | 37a56af | 2023-07-29 15:15:16 -0700 | [diff] [blame] | 186 | FileState UpdateFileState(); |
| 187 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 188 | private: |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 189 | typedef aos::util::ScopedPipe::PipePair PipePair; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 190 | |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 191 | static constexpr const char *const kSudo{"sudo"}; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 192 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 193 | void set_args( |
| 194 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> |
| 195 | &args); |
| 196 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 197 | void DoStart(); |
| 198 | |
| 199 | void DoStop(bool restart); |
| 200 | |
| 201 | void QueueStart(); |
| 202 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 203 | void OnChange(); |
| 204 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 205 | // Copy flatbuffer vector of strings to vector of std::string. |
| 206 | static std::vector<std::string> FbsVectorToVector( |
| 207 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v); |
| 208 | |
| 209 | static std::optional<uid_t> FindUid(const char *name); |
| 210 | static std::optional<gid_t> FindPrimaryGidForUser(const char *name); |
| 211 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 212 | void FetchOutputs(); |
| 213 | |
| 214 | // Provides an std::vector of the args (such that CArgs().data() ends up being |
| 215 | // suitable to pass to execve()). |
| 216 | // The points are invalidated when args_ changes (e.g., due to a set_args |
| 217 | // call). |
| 218 | std::vector<char *> CArgs(); |
| 219 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 220 | // Next unique id for all applications |
| 221 | static inline uint64_t next_id_ = 0; |
| 222 | |
| 223 | std::string name_; |
James Kuszmaul | 37a56af | 2023-07-29 15:15:16 -0700 | [diff] [blame] | 224 | std::filesystem::path path_; |
| 225 | // Inode of path_ immediately prior to the most recent fork() call. |
| 226 | ino_t pre_fork_inode_; |
| 227 | FileState file_state_ = FileState::NOT_RUNNING; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 228 | std::vector<std::string> args_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 229 | std::string user_name_; |
| 230 | std::optional<uid_t> user_; |
| 231 | std::optional<gid_t> group_; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 232 | bool run_as_sudo_ = false; |
Philipp Schrader | c8e779e | 2024-01-25 16:32:39 -0800 | [diff] [blame^] | 233 | std::chrono::nanoseconds stop_grace_period_ = std::chrono::seconds(1); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 234 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 235 | bool capture_stdout_ = false; |
| 236 | PipePair stdout_pipes_; |
| 237 | std::string stdout_; |
| 238 | bool capture_stderr_ = false; |
| 239 | PipePair stderr_pipes_; |
| 240 | std::string stderr_; |
| 241 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 242 | pid_t pid_ = -1; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 243 | PipePair status_pipes_; |
| 244 | uint64_t id_ = 0; |
| 245 | std::optional<int> exit_code_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 246 | aos::monotonic_clock::time_point start_time_, exit_time_; |
| 247 | bool queue_restart_ = false; |
| 248 | bool terminating_ = false; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 249 | bool autostart_ = false; |
| 250 | bool autorestart_ = false; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 251 | |
| 252 | aos::starter::State status_ = aos::starter::State::STOPPED; |
| 253 | aos::starter::LastStopReason stop_reason_ = |
| 254 | aos::starter::LastStopReason::STOP_REQUESTED; |
| 255 | |
| 256 | aos::EventLoop *event_loop_; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 257 | aos::TimerHandler *start_timer_, *restart_timer_, *stop_timer_, *pipe_timer_, |
| 258 | *child_status_handler_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 259 | |
James Kuszmaul | 8544c49 | 2023-07-31 15:00:38 -0700 | [diff] [blame] | 260 | // Version string from the most recent valid timing report for this |
| 261 | // application. Cleared when the application restarts. |
| 262 | std::optional<std::string> latest_timing_report_version_; |
| 263 | aos::monotonic_clock::time_point last_timing_report_ = |
| 264 | aos::monotonic_clock::min_time; |
| 265 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 266 | std::vector<std::function<void()>> on_change_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 267 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 268 | std::unique_ptr<MemoryCGroup> memory_cgroup_; |
| 269 | |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 270 | QuietLogging quiet_flag_ = QuietLogging::kNo; |
| 271 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 272 | DISALLOW_COPY_AND_ASSIGN(Application); |
| 273 | }; |
| 274 | |
| 275 | } // namespace aos::starter |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 276 | #endif // AOS_STARTER_SUBPROCESS_H_ |