James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 1 | #ifndef AOS_STARTER_SUBPROCESS_H_ |
| 2 | #define AOS_STARTER_SUBPROCESS_H_ |
| 3 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 4 | #include <memory> |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 5 | #include <string> |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 6 | #include <tuple> |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 7 | #include <vector> |
| 8 | |
| 9 | #include "aos/events/event_loop.h" |
| 10 | #include "aos/events/shm_event_loop.h" |
| 11 | #include "aos/starter/starter_generated.h" |
| 12 | #include "aos/starter/starter_rpc_generated.h" |
| 13 | #include "aos/util/scoped_pipe.h" |
James Kuszmaul | 6295a64 | 2022-03-22 15:23:59 -0700 | [diff] [blame] | 14 | #include "aos/util/top.h" |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 15 | |
| 16 | namespace aos::starter { |
| 17 | |
| 18 | // Registers a signalfd listener with the given event loop and calls callback |
| 19 | // whenever a signal is received. |
| 20 | class SignalListener { |
| 21 | public: |
| 22 | SignalListener(aos::ShmEventLoop *loop, |
| 23 | std::function<void(signalfd_siginfo)> callback); |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 24 | SignalListener(aos::internal::EPoll *epoll, |
| 25 | std::function<void(signalfd_siginfo)> callback); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 26 | SignalListener(aos::ShmEventLoop *loop, |
| 27 | std::function<void(signalfd_siginfo)> callback, |
| 28 | std::initializer_list<unsigned int> signals); |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 29 | SignalListener(aos::internal::EPoll *epoll, |
| 30 | std::function<void(signalfd_siginfo)> callback, |
| 31 | std::initializer_list<unsigned int> signals); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 32 | |
| 33 | ~SignalListener(); |
| 34 | |
| 35 | private: |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 36 | aos::internal::EPoll *epoll_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 37 | std::function<void(signalfd_siginfo)> callback_; |
| 38 | aos::ipc_lib::SignalFd signalfd_; |
| 39 | |
| 40 | DISALLOW_COPY_AND_ASSIGN(SignalListener); |
| 41 | }; |
| 42 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 43 | // Class to use the V1 cgroup API to limit memory usage. |
| 44 | class MemoryCGroup { |
| 45 | public: |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 46 | // Enum to control if MemoryCGroup should create the cgroup and remove it on |
| 47 | // its own, or if it should assume it already exists and just use it. |
| 48 | enum class Create { |
| 49 | kDoCreate, |
| 50 | kDoNotCreate, |
| 51 | }; |
| 52 | |
| 53 | MemoryCGroup(std::string_view name, Create should_create = Create::kDoCreate); |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 54 | ~MemoryCGroup(); |
| 55 | |
| 56 | // Adds a thread ID to be managed by the cgroup. |
| 57 | void AddTid(pid_t pid = 0); |
| 58 | |
| 59 | // Sets the provided limit to the provided value. |
| 60 | void SetLimit(std::string_view limit_name, uint64_t limit_value); |
| 61 | |
| 62 | private: |
| 63 | std::string cgroup_; |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 64 | Create should_create_; |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 65 | }; |
| 66 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 67 | // Manages a running process, allowing starting and stopping, and restarting |
| 68 | // automatically. |
| 69 | class Application { |
| 70 | public: |
Philipp Schrader | 595979d | 2023-09-13 11:31:48 -0700 | [diff] [blame] | 71 | enum class QuietLogging { |
| 72 | kYes, |
| 73 | kNo, |
| 74 | // For debugging child processes not behaving as expected. When a child |
| 75 | // experiences an event such as exiting with an error code or dying to due a |
| 76 | // signal, this option will cause a log statement to be printed. |
| 77 | kNotForDebugging, |
| 78 | }; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 79 | Application(const aos::Application *application, aos::EventLoop *event_loop, |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 80 | std::function<void()> on_change, |
| 81 | QuietLogging quiet_flag = QuietLogging::kNo); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 82 | |
Sarah Newman | 2c1b121 | 2022-08-10 10:05:48 -0700 | [diff] [blame] | 83 | // executable_name is the actual executable path. |
| 84 | // When sudo is not used, name is used as argv[0] when exec'ing |
| 85 | // executable_name. When sudo is used it's not possible to pass in a |
| 86 | // distinct argv[0]. |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 87 | Application(std::string_view name, std::string_view executable_name, |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 88 | aos::EventLoop *event_loop, std::function<void()> on_change, |
| 89 | QuietLogging quiet_flag = QuietLogging::kNo); |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 90 | |
Adam Snaider | 70deaf2 | 2023-08-11 13:58:34 -0700 | [diff] [blame] | 91 | ~Application(); |
| 92 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 93 | flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus( |
James Kuszmaul | 6295a64 | 2022-03-22 15:23:59 -0700 | [diff] [blame] | 94 | flatbuffers::FlatBufferBuilder *builder, util::Top *top); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 95 | aos::starter::State status() const { return status_; }; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 96 | |
| 97 | // Returns the last pid of this process. -1 if not started yet. |
| 98 | pid_t get_pid() const { return pid_; } |
| 99 | |
| 100 | // Handles a SIGCHLD signal received by the parent. Does nothing if this |
| 101 | // process was not the target. Returns true if this Application should be |
| 102 | // removed. |
| 103 | bool MaybeHandleSignal(); |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 104 | void DisableChildDeathPolling() { child_status_handler_->Disable(); } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 105 | |
| 106 | // Handles a command. May do nothing if application is already in the desired |
| 107 | // state. |
| 108 | void HandleCommand(aos::starter::Command cmd); |
| 109 | |
| 110 | void Start() { HandleCommand(aos::starter::Command::START); } |
| 111 | |
Sanjay Narayanan | 92fdc3d | 2023-08-25 14:42:56 -0700 | [diff] [blame] | 112 | // Stops the command by sending a SIGINT first, followed by a SIGKILL if it's |
| 113 | // still alive in 1s. |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 114 | void Stop() { HandleCommand(aos::starter::Command::STOP); } |
| 115 | |
Sanjay Narayanan | 92fdc3d | 2023-08-25 14:42:56 -0700 | [diff] [blame] | 116 | // Stops the command the same way as Stop() does, but updates internal state |
| 117 | // to reflect that the application was terminated. |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 118 | void Terminate(); |
| 119 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 120 | // Adds a callback which gets notified when the application changes state. |
| 121 | // This is in addition to any existing callbacks and doesn't replace any of |
| 122 | // them. |
| 123 | void AddOnChange(std::function<void()> fn) { |
| 124 | on_change_.emplace_back(std::move(fn)); |
| 125 | } |
| 126 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 127 | void set_args(std::vector<std::string> args); |
| 128 | void set_capture_stdout(bool capture); |
| 129 | void set_capture_stderr(bool capture); |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 130 | void set_run_as_sudo(bool value) { run_as_sudo_ = value; } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 131 | |
| 132 | bool autostart() const { return autostart_; } |
| 133 | |
| 134 | bool autorestart() const { return autorestart_; } |
Adam Snaider | 70deaf2 | 2023-08-11 13:58:34 -0700 | [diff] [blame] | 135 | void set_autorestart(bool autorestart) { autorestart_ = autorestart; } |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 136 | |
Philipp Schrader | 595979d | 2023-09-13 11:31:48 -0700 | [diff] [blame] | 137 | LastStopReason stop_reason() const { return stop_reason_; } |
| 138 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 139 | const std::string &GetStdout(); |
| 140 | const std::string &GetStderr(); |
| 141 | std::optional<int> exit_code() const { return exit_code_; } |
| 142 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 143 | // Sets the memory limit for the application to the provided limit. |
| 144 | void SetMemoryLimit(size_t limit) { |
| 145 | if (!memory_cgroup_) { |
| 146 | memory_cgroup_ = std::make_unique<MemoryCGroup>(name_); |
| 147 | } |
| 148 | memory_cgroup_->SetLimit("memory.limit_in_bytes", limit); |
| 149 | } |
| 150 | |
Austin Schuh | 77e20a3 | 2023-08-01 12:25:03 -0700 | [diff] [blame] | 151 | // Sets the cgroup and memory limit to a pre-existing cgroup which is |
| 152 | // externally managed. This lets us configure the cgroup of an application |
| 153 | // without root access. |
| 154 | void SetExistingCgroupMemoryLimit(std::string_view name, size_t limit) { |
| 155 | if (!memory_cgroup_) { |
| 156 | memory_cgroup_ = std::make_unique<MemoryCGroup>( |
| 157 | name, MemoryCGroup::Create::kDoNotCreate); |
| 158 | } |
| 159 | memory_cgroup_->SetLimit("memory.limit_in_bytes", limit); |
| 160 | } |
| 161 | |
James Kuszmaul | 8544c49 | 2023-07-31 15:00:38 -0700 | [diff] [blame] | 162 | // Observe a timing report message, and save it if it is relevant to us. |
| 163 | // It is the responsibility of the caller to manage this, because the lifetime |
| 164 | // of the Application itself is such that it cannot own Fetchers readily. |
| 165 | void ObserveTimingReport(const aos::monotonic_clock::time_point send_time, |
| 166 | const aos::timing::Report *msg); |
| 167 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 168 | private: |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 169 | typedef aos::util::ScopedPipe::PipePair PipePair; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 170 | |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 171 | static constexpr const char *const kSudo{"sudo"}; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 172 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 173 | void set_args( |
| 174 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> |
| 175 | &args); |
| 176 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 177 | void DoStart(); |
| 178 | |
| 179 | void DoStop(bool restart); |
| 180 | |
| 181 | void QueueStart(); |
| 182 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 183 | void OnChange(); |
| 184 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 185 | // Copy flatbuffer vector of strings to vector of std::string. |
| 186 | static std::vector<std::string> FbsVectorToVector( |
| 187 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> &v); |
| 188 | |
| 189 | static std::optional<uid_t> FindUid(const char *name); |
| 190 | static std::optional<gid_t> FindPrimaryGidForUser(const char *name); |
| 191 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 192 | void FetchOutputs(); |
| 193 | |
| 194 | // Provides an std::vector of the args (such that CArgs().data() ends up being |
| 195 | // suitable to pass to execve()). |
| 196 | // The points are invalidated when args_ changes (e.g., due to a set_args |
| 197 | // call). |
| 198 | std::vector<char *> CArgs(); |
| 199 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 200 | // Next unique id for all applications |
| 201 | static inline uint64_t next_id_ = 0; |
| 202 | |
| 203 | std::string name_; |
| 204 | std::string path_; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 205 | std::vector<std::string> args_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 206 | std::string user_name_; |
| 207 | std::optional<uid_t> user_; |
| 208 | std::optional<gid_t> group_; |
Sanjay Narayanan | 01a228f | 2022-04-26 14:19:30 -0700 | [diff] [blame] | 209 | bool run_as_sudo_ = false; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 210 | |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 211 | bool capture_stdout_ = false; |
| 212 | PipePair stdout_pipes_; |
| 213 | std::string stdout_; |
| 214 | bool capture_stderr_ = false; |
| 215 | PipePair stderr_pipes_; |
| 216 | std::string stderr_; |
| 217 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 218 | pid_t pid_ = -1; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 219 | PipePair status_pipes_; |
| 220 | uint64_t id_ = 0; |
| 221 | std::optional<int> exit_code_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 222 | aos::monotonic_clock::time_point start_time_, exit_time_; |
| 223 | bool queue_restart_ = false; |
| 224 | bool terminating_ = false; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 225 | bool autostart_ = false; |
| 226 | bool autorestart_ = false; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 227 | |
| 228 | aos::starter::State status_ = aos::starter::State::STOPPED; |
| 229 | aos::starter::LastStopReason stop_reason_ = |
| 230 | aos::starter::LastStopReason::STOP_REQUESTED; |
| 231 | |
| 232 | aos::EventLoop *event_loop_; |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 233 | aos::TimerHandler *start_timer_, *restart_timer_, *stop_timer_, *pipe_timer_, |
| 234 | *child_status_handler_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 235 | |
James Kuszmaul | 8544c49 | 2023-07-31 15:00:38 -0700 | [diff] [blame] | 236 | // Version string from the most recent valid timing report for this |
| 237 | // application. Cleared when the application restarts. |
| 238 | std::optional<std::string> latest_timing_report_version_; |
| 239 | aos::monotonic_clock::time_point last_timing_report_ = |
| 240 | aos::monotonic_clock::min_time; |
| 241 | |
Austin Schuh | 1cea903 | 2023-07-10 11:56:40 -0700 | [diff] [blame] | 242 | std::vector<std::function<void()>> on_change_; |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 243 | |
Austin Schuh | bbeb37e | 2022-08-17 16:19:27 -0700 | [diff] [blame] | 244 | std::unique_ptr<MemoryCGroup> memory_cgroup_; |
| 245 | |
payton.rehl | 2841b1c | 2023-05-25 17:23:55 -0700 | [diff] [blame] | 246 | QuietLogging quiet_flag_ = QuietLogging::kNo; |
| 247 | |
James Kuszmaul | 3224b8e | 2022-01-07 19:00:39 -0800 | [diff] [blame] | 248 | DISALLOW_COPY_AND_ASSIGN(Application); |
| 249 | }; |
| 250 | |
| 251 | } // namespace aos::starter |
James Kuszmaul | d42edb4 | 2022-01-07 18:00:16 -0800 | [diff] [blame] | 252 | #endif // AOS_STARTER_SUBPROCESS_H_ |