blob: 340bbebadd23a92d990228c4b5d76bc09a40869e [file] [log] [blame]
Brian Silvermanaf784862014-05-13 08:14:55 -07001// This has to come before anybody drags in <stdlib.h> or else we end up with
2// the wrong version of WIFEXITED etc (for one thing, they don't const-qualify
3// their casts) (sometimes at least).
4#include <sys/wait.h>
5
Brian Silvermand169fcd2013-02-27 13:18:47 -08006#include <stdio.h>
7#include <stdlib.h>
8#include <sys/types.h>
9#include <fcntl.h>
10#include <sys/inotify.h>
11#include <sys/stat.h>
12#include <sys/ioctl.h>
Brian Silvermand169fcd2013-02-27 13:18:47 -080013#include <signal.h>
14#include <stdint.h>
15#include <errno.h>
16#include <string.h>
Brian Silvermand90b5fe2013-03-10 18:34:42 -070017#include <inttypes.h>
Brian Silvermand169fcd2013-02-27 13:18:47 -080018
19#include <map>
20#include <functional>
21#include <deque>
22#include <fstream>
23#include <queue>
24#include <list>
25#include <string>
26#include <vector>
27#include <memory>
Brian Silvermand94642c2014-03-27 18:21:41 -070028#include <set>
Brian Silvermand169fcd2013-02-27 13:18:47 -080029
Brian Silverman1463c092020-10-30 17:28:24 -070030#include "absl/base/call_once.h"
31#include "glog/logging.h"
Brian Silverman258b9172015-09-19 14:32:57 -040032#include "third_party/libevent/event.h"
Brian Silvermand169fcd2013-02-27 13:18:47 -080033
John Park33858a32018-09-28 23:05:48 -070034#include "aos/libc/aos_strsignal.h"
35#include "aos/logging/implementations.h"
36#include "aos/logging/logging.h"
37#include "aos/time/time.h"
38#include "aos/unique_malloc_ptr.h"
39#include "aos/util/run_command.h"
John Park398c74a2018-10-20 21:17:39 -070040#include "aos/init.h"
Brian Silvermand169fcd2013-02-27 13:18:47 -080041
42// This is the main piece of code that starts all of the rest of the code and
43// restarts it when the binaries are modified.
44//
Brian Silvermanbc4fc2f2013-02-27 19:33:42 -080045// Throughout, the code is not terribly concerned with thread safety because
46// there is only 1 thread. It does some setup and then lets inotify run things
47// when appropriate.
48//
Brian Silverman5cc661b2013-02-27 15:23:36 -080049// NOTE: This program should never exit nicely. It catches all nice attempts to
50// exit, forwards them to all of the children that it has started, waits for
Brian Silvermand169fcd2013-02-27 13:18:47 -080051// them to exit nicely, and then SIGKILLs anybody left (which will always
52// include itself).
53
54using ::std::unique_ptr;
55
56namespace aos {
57namespace starter {
58
Austin Schuhf2a50ba2016-12-24 16:16:26 -080059namespace chrono = ::std::chrono;
60
Brian Silverman0eec9532013-02-27 20:24:16 -080061// TODO(brians): split out the c++ libevent wrapper stuff into its own file(s)
Brian Silvermand169fcd2013-02-27 13:18:47 -080062class EventBaseDeleter {
63 public:
64 void operator()(event_base *base) {
Brian Silverman8070a222013-02-28 15:01:36 -080065 if (base == NULL) return;
Brian Silvermand169fcd2013-02-27 13:18:47 -080066 event_base_free(base);
67 }
68};
69typedef unique_ptr<event_base, EventBaseDeleter> EventBaseUniquePtr;
Brian Silverman5cc661b2013-02-27 15:23:36 -080070EventBaseUniquePtr libevent_base;
Brian Silvermand169fcd2013-02-27 13:18:47 -080071
72class EventDeleter {
73 public:
74 void operator()(event *evt) {
Brian Silverman8070a222013-02-28 15:01:36 -080075 if (evt == NULL) return;
Brian Silvermand169fcd2013-02-27 13:18:47 -080076 if (event_del(evt) != 0) {
Austin Schuh70551b72020-02-22 14:52:23 -080077 LOG(WARNING) << "event_del(" << evt << ") failed";
Brian Silvermand169fcd2013-02-27 13:18:47 -080078 }
79 }
80};
81typedef unique_ptr<event, EventDeleter> EventUniquePtr;
82
Brian Silverman5cc661b2013-02-27 15:23:36 -080083// Watches a file path for modifications. Once created, keeps watching until
84// destroyed or RemoveWatch() is called.
Brian Silverman0eec9532013-02-27 20:24:16 -080085// TODO(brians): split this out into its own file + tests
Brian Silvermand169fcd2013-02-27 13:18:47 -080086class FileWatch {
87 public:
88 // Will call callback(value) when filename is modified.
89 // If value is NULL, then a pointer to this object will be passed instead.
Brian Silverman5cc661b2013-02-27 15:23:36 -080090 //
91 // Watching for file creations is slightly different. To do that, pass true
Brian Silverman8070a222013-02-28 15:01:36 -080092 // as create, the directory where the file will be created for filename, and
Brian Silverman5cc661b2013-02-27 15:23:36 -080093 // the name of the file (without directory name) for check_filename.
Brian Silvermand169fcd2013-02-27 13:18:47 -080094 FileWatch(std::string filename,
Brian Silverman8070a222013-02-28 15:01:36 -080095 std::function<void(void *)> callback,
96 void *value,
97 bool create = false,
98 std::string check_filename = "")
99 : filename_(filename),
100 callback_(callback),
101 value_(value),
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700102 create_(create),
103 check_filename_(check_filename),
104 watch_(-1) {
John Park7bf05bf2019-12-02 21:33:19 -0800105 absl::call_once(once_, Init);
Brian Silverman5cc661b2013-02-27 15:23:36 -0800106
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700107 CreateWatch();
Brian Silvermand169fcd2013-02-27 13:18:47 -0800108 }
John Park7bf05bf2019-12-02 21:33:19 -0800109
Brian Silvermand169fcd2013-02-27 13:18:47 -0800110 // Cleans up everything.
111 ~FileWatch() {
112 if (watch_ != -1) {
113 RemoveWatch();
114 }
115 }
116
117 // After calling this method, this object won't really be doing much of
Brian Silverman5cc661b2013-02-27 15:23:36 -0800118 // anything besides possibly running its callback or something.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800119 void RemoveWatch() {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700120 AOS_CHECK_NE(watch_, -1);
121 AOS_CHECK_EQ(watch_to_remove_, -1);
Brian Silverman5cc661b2013-02-27 15:23:36 -0800122
Brian Silvermand169fcd2013-02-27 13:18:47 -0800123 if (inotify_rm_watch(notify_fd, watch_) == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700124 AOS_PLOG(WARNING, "inotify_rm_watch(%d, %d) failed", notify_fd, watch_);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800125 }
Brian Silvermand94642c2014-03-27 18:21:41 -0700126 watch_to_remove_ = watch_;
127 watch_ = -1;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800128 }
129
Brian Silverman5cc661b2013-02-27 15:23:36 -0800130 private:
John Park7bf05bf2019-12-02 21:33:19 -0800131 // Performs the static initialization. Called by from the constructor
132 static void Init() {
Brian Silverman5cc661b2013-02-27 15:23:36 -0800133 notify_fd = inotify_init1(IN_CLOEXEC);
134 EventUniquePtr notify_event(event_new(libevent_base.get(), notify_fd,
135 EV_READ | EV_PERSIST,
136 FileWatch::INotifyReadable, NULL));
137 event_add(notify_event.release(), NULL);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800138 }
139
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700140 void RemoveWatchFromMap() {
Brian Silvermand94642c2014-03-27 18:21:41 -0700141 int watch = watch_to_remove_;
142 if (watch == -1) {
Austin Schuh70551b72020-02-22 14:52:23 -0800143 CHECK_NE(watch_, -1);
Brian Silvermand94642c2014-03-27 18:21:41 -0700144 watch = watch_;
145 }
146 if (watchers[watch] != this) {
Austin Schuh70551b72020-02-22 14:52:23 -0800147 LOG(WARNING) << "watcher for " << filename_ << " (" << this
148 << ") didn't find itself in the map";
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700149 } else {
Brian Silvermand94642c2014-03-27 18:21:41 -0700150 watchers.erase(watch);
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700151 }
Austin Schuh70551b72020-02-22 14:52:23 -0800152 VLOG(1) << "removed watch ID " << watch;
Brian Silvermand94642c2014-03-27 18:21:41 -0700153 if (watch_to_remove_ == -1) {
154 watch_ = -1;
155 } else {
156 watch_to_remove_ = -1;
157 }
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700158 }
159
160 void CreateWatch() {
Austin Schuh70551b72020-02-22 14:52:23 -0800161 CHECK_EQ(watch_, -1);
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700162 watch_ = inotify_add_watch(notify_fd, filename_.c_str(),
163 create_ ? IN_CREATE : (IN_ATTRIB |
164 IN_MODIFY |
165 IN_DELETE_SELF |
166 IN_MOVE_SELF));
167 if (watch_ == -1) {
Austin Schuh70551b72020-02-22 14:52:23 -0800168 PLOG(FATAL) << "inotify_add_watch(" << notify_fd << ", " << filename_
169 << ", " << (create_ ? "true" : "false")
170 << " ? IN_CREATE : (IN_ATTRIB | IN_MODIFY)) failed";
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700171 }
172 watchers[watch_] = this;
Austin Schuh70551b72020-02-22 14:52:23 -0800173 VLOG(1) << "watch for " << filename_ << " is " << watch_;
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700174 }
175
Brian Silvermand169fcd2013-02-27 13:18:47 -0800176 // This gets set up as the callback for EV_READ on the inotify file
Brian Silverman5cc661b2013-02-27 15:23:36 -0800177 // descriptor. It calls FileNotified on the appropriate instance.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800178 static void INotifyReadable(int /*fd*/, short /*events*/, void *) {
179 unsigned int to_read;
Brian Silverman5cc661b2013-02-27 15:23:36 -0800180 // Use FIONREAD to figure out how many bytes there are to read.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800181 if (ioctl(notify_fd, FIONREAD, &to_read) < 0) {
Austin Schuh70551b72020-02-22 14:52:23 -0800182 PLOG(FATAL) << "FIONREAD(" << notify_fd << ", " << &to_read << ") failed";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800183 }
184 inotify_event *notifyevt = static_cast<inotify_event *>(malloc(to_read));
185 const char *end = reinterpret_cast<char *>(notifyevt) + to_read;
186 aos::unique_c_ptr<inotify_event> freer(notifyevt);
187
188 ssize_t ret = read(notify_fd, notifyevt, to_read);
189 if (ret < 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700190 AOS_PLOG(FATAL, "read(%d, %p, %u) failed", notify_fd, notifyevt, to_read);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800191 }
192 if (static_cast<size_t>(ret) != to_read) {
Austin Schuh70551b72020-02-22 14:52:23 -0800193 LOG(ERROR) << "read(" << notify_fd << ", " << notifyevt << ", " << to_read
194 << ") returned " << ret << " instead of " << to_read;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800195 return;
196 }
197
Brian Silverman5cc661b2013-02-27 15:23:36 -0800198 // Keep looping through until we get to the end because inotify does return
199 // multiple events at once.
Brian Silvermanb1e4f6c2013-02-27 15:42:02 -0800200 while (reinterpret_cast<char *>(notifyevt) < end) {
Brian Silvermand169fcd2013-02-27 13:18:47 -0800201 if (watchers.count(notifyevt->wd) != 1) {
Austin Schuh70551b72020-02-22 14:52:23 -0800202 LOG(WARNING) << "couldn't find whose watch ID " << notifyevt->wd
203 << " is";
Brian Silvermanb1e4f6c2013-02-27 15:42:02 -0800204 } else {
Austin Schuh70551b72020-02-22 14:52:23 -0800205 VLOG(1) << "mask=" << notifyevt->mask;
Brian Silvermand94642c2014-03-27 18:21:41 -0700206 // If the watch was removed.
207 if (notifyevt->mask & IN_IGNORED) {
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700208 watchers[notifyevt->wd]->WatchDeleted();
209 } else {
Brian Silvermand94642c2014-03-27 18:21:41 -0700210 watchers[notifyevt->wd]
211 ->FileNotified((notifyevt->len > 0) ? notifyevt->name : NULL);
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700212 }
Brian Silvermand169fcd2013-02-27 13:18:47 -0800213 }
Brian Silvermand169fcd2013-02-27 13:18:47 -0800214
215 notifyevt = reinterpret_cast<inotify_event *>(
Brian Silvermandbdf1d02013-11-17 13:19:41 -0800216 __builtin_assume_aligned(reinterpret_cast<char *>(notifyevt) +
217 sizeof(*notifyevt) + notifyevt->len,
Brian Silvermanafc00a62014-04-21 17:51:23 -0700218 alignof(inotify_event)));
Brian Silvermand169fcd2013-02-27 13:18:47 -0800219 }
220 }
221
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700222 // INotifyReadable calls this method whenever the watch for our file gets
223 // removed somehow.
224 void WatchDeleted() {
Austin Schuh70551b72020-02-22 14:52:23 -0800225 VLOG(1) << "watch for " << filename_ << " deleted";
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700226 RemoveWatchFromMap();
227 CreateWatch();
228 }
229
Brian Silverman5cc661b2013-02-27 15:23:36 -0800230 // INotifyReadable calls this method whenever the watch for our file triggers.
231 void FileNotified(const char *filename) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700232 AOS_CHECK_NE(watch_, -1);
Austin Schuh70551b72020-02-22 14:52:23 -0800233 VLOG(1) << "got a notification for " << filename_;
Brian Silverman5cc661b2013-02-27 15:23:36 -0800234
235 if (!check_filename_.empty()) {
236 if (filename == NULL) {
237 return;
238 }
239 if (std::string(filename) != check_filename_) {
240 return;
241 }
242 }
243
244 callback_((value_ == NULL) ? this : value_);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800245 }
246
John Park7bf05bf2019-12-02 21:33:19 -0800247 static absl::once_flag once_;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800248 const std::string filename_;
249 const std::function<void(void *)> callback_;
250 void *const value_;
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700251 const bool create_;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800252 std::string check_filename_;
253
254 // The watch descriptor or -1 if we don't have one any more.
255 int watch_;
Brian Silvermand94642c2014-03-27 18:21:41 -0700256 // The watch that we still have to take out of the map once we get the
257 // IN_IGNORED or -1.
258 int watch_to_remove_ = -1;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800259
Brian Silvermanbc4fc2f2013-02-27 19:33:42 -0800260 // Map from watch IDs to instances of this class.
261 // <https://patchwork.kernel.org/patch/73192/> ("inotify: do not reuse watch
262 // descriptors") says they won't get reused, but that shouldn't be counted on
263 // because we might have a modified/different version/whatever kernel.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800264 static std::map<int, FileWatch *> watchers;
Brian Silverman5cc661b2013-02-27 15:23:36 -0800265 // The inotify(7) file descriptor.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800266 static int notify_fd;
Brian Silvermanbc4fc2f2013-02-27 19:33:42 -0800267
268 DISALLOW_COPY_AND_ASSIGN(FileWatch);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800269};
270std::map<int, FileWatch *> FileWatch::watchers;
271int FileWatch::notify_fd;
John Park7bf05bf2019-12-02 21:33:19 -0800272absl::once_flag FileWatch::once_;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800273
Brian Silverman5cc661b2013-02-27 15:23:36 -0800274// Runs the given command and returns its first line of output (not including
275// the \n). LOG(FATAL)s if the command has an exit status other than 0 or does
276// not print out an entire line.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800277std::string RunCommand(std::string command) {
Brian Silverman5cc661b2013-02-27 15:23:36 -0800278 // popen(3) might fail and not set it.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800279 errno = 0;
Brian Silverman5cc661b2013-02-27 15:23:36 -0800280 FILE *pipe = popen(command.c_str(), "r");
281 if (pipe == NULL) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700282 AOS_PLOG(FATAL, "popen(\"%s\", \"r\") failed", command.c_str());
Brian Silvermand169fcd2013-02-27 13:18:47 -0800283 }
284
Brian Silverman5cc661b2013-02-27 15:23:36 -0800285 // result_size is how many bytes result is currently allocated to.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800286 size_t result_size = 128, read = 0;
287 unique_c_ptr<char> result(static_cast<char *>(malloc(result_size)));
288 while (true) {
Brian Silverman5cc661b2013-02-27 15:23:36 -0800289 // If we filled up the buffer, then realloc(3) it bigger.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800290 if (read == result_size) {
291 result_size *= 2;
292 void *new_result = realloc(result.get(), result_size);
293 if (new_result == NULL) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700294 AOS_PLOG(FATAL, "realloc(%p, %zd) failed", result.get(), result_size);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800295 } else {
296 result.release();
297 result = unique_c_ptr<char>(static_cast<char *>(new_result));
298 }
299 }
300
Brian Silverman5cc661b2013-02-27 15:23:36 -0800301 size_t ret = fread(result.get() + read, 1, result_size - read, pipe);
302 // If the read didn't fill up the whole buffer, check to see if it was
303 // because of an error.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800304 if (ret < result_size - read) {
Brian Silverman5cc661b2013-02-27 15:23:36 -0800305 if (ferror(pipe)) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700306 AOS_PLOG(FATAL, "couldn't finish reading output of \"%s\"\n",
307 command.c_str());
Brian Silvermand169fcd2013-02-27 13:18:47 -0800308 }
309 }
310 read += ret;
311 if (read > 0 && result.get()[read - 1] == '\n') {
312 break;
313 }
314
Brian Silverman5cc661b2013-02-27 15:23:36 -0800315 if (feof(pipe)) {
Austin Schuh70551b72020-02-22 14:52:23 -0800316 LOG(FATAL) << "`" << command << "` failed. didn't print a whole line";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800317 }
318 }
319
Brian Silverman5cc661b2013-02-27 15:23:36 -0800320 // Get rid of the first \n and anything after it.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800321 *strchrnul(result.get(), '\n') = '\0';
322
Brian Silverman5cc661b2013-02-27 15:23:36 -0800323 int child_status = pclose(pipe);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800324 if (child_status == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700325 AOS_PLOG(FATAL, "pclose(%p) failed", pipe);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800326 }
327
328 if (child_status != 0) {
Austin Schuh70551b72020-02-22 14:52:23 -0800329 LOG(FATAL) << "`" << command << "` failed. return " << child_status;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800330 }
331
332 return std::string(result.get());
333}
334
335// Will call callback(arg) after time.
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800336void Timeout(monotonic_clock::duration time,
337 void (*callback)(int, short, void *), void *arg) {
Brian Silvermand169fcd2013-02-27 13:18:47 -0800338 EventUniquePtr timeout(evtimer_new(libevent_base.get(), callback, arg));
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800339 struct timeval time_timeval;
340 {
341 ::std::chrono::seconds sec =
342 ::std::chrono::duration_cast<::std::chrono::seconds>(time);
343 ::std::chrono::microseconds usec =
344 ::std::chrono::duration_cast<::std::chrono::microseconds>(time - sec);
345 time_timeval.tv_sec = sec.count();
346 time_timeval.tv_usec = usec.count();
347 }
Brian Silvermand94642c2014-03-27 18:21:41 -0700348 if (evtimer_add(timeout.release(), &time_timeval) != 0) {
Austin Schuh1d4920f2020-02-22 14:56:37 -0800349 LOG(FATAL) << "evtimer_add(" << timeout.release() << ", " << &time_timeval
350 << ") failed";
Brian Silvermand94642c2014-03-27 18:21:41 -0700351 }
Brian Silvermand169fcd2013-02-27 13:18:47 -0800352}
353
Brian Silvermand94642c2014-03-27 18:21:41 -0700354class Child;
Austin Schuh1d4920f2020-02-22 14:56:37 -0800355// This is where all of the Child instances live.
Brian Silvermand94642c2014-03-27 18:21:41 -0700356std::vector<unique_ptr<Child>> children;
Brian Silvermand94642c2014-03-27 18:21:41 -0700357
Brian Silvermand169fcd2013-02-27 13:18:47 -0800358// Represents a child process. It will take care of restarting itself etc.
359class Child {
360 public:
Brian Silverman5cc661b2013-02-27 15:23:36 -0800361 // command is the (space-separated) command to run and its arguments.
362 Child(const std::string &command) : pid_(-1),
Brian Silvermanfe06fe12013-02-27 18:54:58 -0800363 stat_at_start_valid_(false) {
Brian Silvermand94642c2014-03-27 18:21:41 -0700364 if (!restart_timeout) {
365 restart_timeout = EventUniquePtr(
366 evtimer_new(libevent_base.get(), StaticDoRestart, nullptr));
367 }
Brian Silvermand169fcd2013-02-27 13:18:47 -0800368 const char *start, *end;
369 start = command.c_str();
370 while (true) {
371 end = strchrnul(start, ' ');
372 args_.push_back(std::string(start, end - start));
373 start = end + 1;
374 if (*end == '\0') {
375 break;
376 }
377 }
378
Brian Silverman5cc661b2013-02-27 15:23:36 -0800379 original_binary_ = RunCommand("which " + args_[0]);
380 binary_ = original_binary_ + ".stm";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800381
382 watcher_ = unique_ptr<FileWatch>(
Brian Silverman5cc661b2013-02-27 15:23:36 -0800383 new FileWatch(original_binary_, StaticFileModified, this));
Brian Silvermand169fcd2013-02-27 13:18:47 -0800384
385 Start();
386 }
387
388 pid_t pid() { return pid_; }
389
390 // This gets called whenever the actual process dies and should (probably) be
391 // restarted.
392 void ProcessDied() {
393 pid_ = -1;
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800394 restarts_.push(monotonic_clock::now());
Brian Silvermand169fcd2013-02-27 13:18:47 -0800395 if (restarts_.size() > kMaxRestartsNumber) {
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800396 monotonic_clock::time_point oldest = restarts_.front();
Brian Silvermand169fcd2013-02-27 13:18:47 -0800397 restarts_.pop();
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800398 if (monotonic_clock::now() <= kMaxRestartsTime + oldest) {
Austin Schuh70551b72020-02-22 14:52:23 -0800399 LOG(WARNING) << "process " << name() << " getting restarted too often";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800400 Timeout(kResumeWait, StaticStart, this);
401 return;
402 }
403 }
404 Start();
405 }
406
407 // Returns a name for logging purposes.
408 const char *name() {
409 return args_[0].c_str();
410 }
411
412 private:
413 struct CheckDiedStatus {
414 Child *self;
415 pid_t old_pid;
416 };
417
418 // How long to wait for a child to die nicely.
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800419 static constexpr chrono::nanoseconds kProcessDieTime = chrono::seconds(2);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800420
421 // How long to wait after the file is modified to restart it.
422 // This is important because some programs like modifying the binaries by
423 // writing them in little bits, which results in attempting to start partial
424 // binaries without this.
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800425 static constexpr chrono::nanoseconds kRestartWaitTime =
426 chrono::milliseconds(1500);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800427
Brian Silverman5cc661b2013-02-27 15:23:36 -0800428 // Only kMaxRestartsNumber restarts will be allowed in kMaxRestartsTime.
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800429 static constexpr chrono::nanoseconds kMaxRestartsTime = chrono::seconds(4);
Brian Silverman52aeeac2013-08-28 16:20:53 -0700430 static const size_t kMaxRestartsNumber = 3;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800431 // How long to wait if it gets restarted too many times.
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800432 static constexpr chrono::nanoseconds kResumeWait = chrono::seconds(5);
Brian Silverman5cc661b2013-02-27 15:23:36 -0800433
Brian Silvermand169fcd2013-02-27 13:18:47 -0800434 static void StaticFileModified(void *self) {
435 static_cast<Child *>(self)->FileModified();
436 }
Brian Silverman5cc661b2013-02-27 15:23:36 -0800437
Brian Silvermand169fcd2013-02-27 13:18:47 -0800438 void FileModified() {
Austin Schuh70551b72020-02-22 14:52:23 -0800439 LOG(INFO) << "file for " << name() << " modified";
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800440 struct timeval restart_time_timeval;
441 {
442 ::std::chrono::seconds sec =
443 ::std::chrono::duration_cast<::std::chrono::seconds>(
444 kRestartWaitTime);
445 ::std::chrono::microseconds usec =
446 ::std::chrono::duration_cast<::std::chrono::microseconds>(
447 kRestartWaitTime - sec);
448 restart_time_timeval.tv_sec = sec.count();
449 restart_time_timeval.tv_usec = usec.count();
450 }
Brian Silvermand169fcd2013-02-27 13:18:47 -0800451 // This will reset the timeout again if it hasn't run yet.
Brian Silvermand94642c2014-03-27 18:21:41 -0700452 if (evtimer_add(restart_timeout.get(), &restart_time_timeval) != 0) {
Austin Schuh70551b72020-02-22 14:52:23 -0800453 LOG(FATAL) << "evtimer_add(" << restart_timeout.get() << ", "
454 << &restart_time_timeval << ") failed";
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700455 }
Brian Silvermand94642c2014-03-27 18:21:41 -0700456 waiting_to_restart.insert(this);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800457 }
458
Brian Silvermand94642c2014-03-27 18:21:41 -0700459 static void StaticDoRestart(int, short, void *) {
Austin Schuh70551b72020-02-22 14:52:23 -0800460 LOG(INFO) << "restarting everything that needs it";
Brian Silvermand94642c2014-03-27 18:21:41 -0700461 for (auto c : waiting_to_restart) {
462 c->DoRestart();
463 }
464 waiting_to_restart.clear();
Brian Silvermand169fcd2013-02-27 13:18:47 -0800465 }
466
Brian Silvermanb1e4f6c2013-02-27 15:42:02 -0800467 // Called after somebody else has finished modifying the file.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800468 void DoRestart() {
Brian Silvermand94642c2014-03-27 18:21:41 -0700469 fprintf(stderr, "DoRestart(%s)\n", binary_.c_str());
Brian Silvermanfe06fe12013-02-27 18:54:58 -0800470 if (stat_at_start_valid_) {
471 struct stat current_stat;
472 if (stat(original_binary_.c_str(), &current_stat) == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700473 AOS_PLOG(FATAL, "stat(%s, %p) failed", original_binary_.c_str(),
474 &current_stat);
Brian Silvermanfe06fe12013-02-27 18:54:58 -0800475 }
476 if (current_stat.st_mtime == stat_at_start_.st_mtime) {
Austin Schuh70551b72020-02-22 14:52:23 -0800477 LOG(INFO) << "ignoring trigger for " << name()
478 << " because mtime didn't change";
Brian Silvermanfe06fe12013-02-27 18:54:58 -0800479 return;
480 }
481 }
482
Brian Silvermand169fcd2013-02-27 13:18:47 -0800483 if (pid_ != -1) {
Austin Schuh70551b72020-02-22 14:52:23 -0800484 LOG(INFO) << "sending SIGTERM to child " << pid_ << " to restart it";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800485 if (kill(pid_, SIGTERM) == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700486 AOS_PLOG(WARNING, "kill(%d, SIGTERM) failed", pid_);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800487 }
488 CheckDiedStatus *status = new CheckDiedStatus();
489 status->self = this;
490 status->old_pid = pid_;
491 Timeout(kProcessDieTime, StaticCheckDied, status);
Brian Silvermand90b5fe2013-03-10 18:34:42 -0700492 } else {
Austin Schuh70551b72020-02-22 14:52:23 -0800493 LOG(WARNING) << name() << " restart attempted but not running";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800494 }
495 }
496
497 static void StaticCheckDied(int, short, void *status_in) {
498 CheckDiedStatus *status = static_cast<CheckDiedStatus *>(status_in);
499 status->self->CheckDied(status->old_pid);
500 delete status;
501 }
Brian Silverman5cc661b2013-02-27 15:23:36 -0800502
503 // Checks to see if the child using the PID old_pid is still running.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800504 void CheckDied(pid_t old_pid) {
505 if (pid_ == old_pid) {
Austin Schuh70551b72020-02-22 14:52:23 -0800506 LOG(WARNING) << "child " << old_pid << " refused to die";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800507 if (kill(old_pid, SIGKILL) == -1) {
Austin Schuh70551b72020-02-22 14:52:23 -0800508 LOG(WARNING) << "kill(" << old_pid << ", SIGKILL) failed";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800509 }
510 }
511 }
512
513 static void StaticStart(int, short, void *self) {
514 static_cast<Child *>(self)->Start();
515 }
Brian Silverman5cc661b2013-02-27 15:23:36 -0800516
517 // Actually starts the child.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800518 void Start() {
519 if (pid_ != -1) {
Austin Schuh70551b72020-02-22 14:52:23 -0800520 LOG(WARNING) << "calling Start() but already have child " << pid_
521 << " running";
Brian Silverman5cc661b2013-02-27 15:23:36 -0800522 if (kill(pid_, SIGKILL) == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700523 AOS_PLOG(WARNING, "kill(%d, SIGKILL) failed", pid_);
Brian Silverman5cc661b2013-02-27 15:23:36 -0800524 return;
525 }
Brian Silvermand169fcd2013-02-27 13:18:47 -0800526 pid_ = -1;
527 }
Brian Silverman5cc661b2013-02-27 15:23:36 -0800528
529 // Remove the name that we run from (ie from a previous execution) and then
530 // hard link the real filename to it.
531 if (unlink(binary_.c_str()) != 0 && errno != ENOENT) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700532 AOS_PLOG(FATAL, "removing %s failed", binary_.c_str());
Brian Silverman5cc661b2013-02-27 15:23:36 -0800533 }
534 if (link(original_binary_.c_str(), binary_.c_str()) != 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700535 AOS_PLOG(FATAL, "link('%s', '%s') failed", original_binary_.c_str(),
536 binary_.c_str());
Brian Silverman5cc661b2013-02-27 15:23:36 -0800537 }
538
Brian Silvermanfe06fe12013-02-27 18:54:58 -0800539 if (stat(original_binary_.c_str(), &stat_at_start_) == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700540 AOS_PLOG(FATAL, "stat(%s, %p) failed", original_binary_.c_str(),
541 &stat_at_start_);
Brian Silvermanfe06fe12013-02-27 18:54:58 -0800542 }
543 stat_at_start_valid_ = true;
544
Brian Silvermand169fcd2013-02-27 13:18:47 -0800545 if ((pid_ = fork()) == 0) {
546 ssize_t args_size = args_.size();
547 const char **argv = new const char *[args_size + 1];
548 for (int i = 0; i < args_size; ++i) {
549 argv[i] = args_[i].c_str();
550 }
551 argv[args_size] = NULL;
552 // The const_cast is safe because no code that might care if it gets
553 // modified can run afterwards.
554 execv(binary_.c_str(), const_cast<char **>(argv));
Austin Schuhf257f3c2019-10-27 21:00:43 -0700555 AOS_PLOG(FATAL, "execv(%s, %p) failed", binary_.c_str(), argv);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800556 _exit(EXIT_FAILURE);
557 }
558 if (pid_ == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700559 AOS_PLOG(FATAL, "forking to run \"%s\" failed", binary_.c_str());
Brian Silvermand169fcd2013-02-27 13:18:47 -0800560 }
Austin Schuh70551b72020-02-22 14:52:23 -0800561 LOG(INFO) << "started \"" << binary_ << "\" successfully";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800562 }
Brian Silvermanbc4fc2f2013-02-27 19:33:42 -0800563
564 // A history of the times that this process has been restarted.
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800565 std::queue<monotonic_clock::time_point,
566 std::list<monotonic_clock::time_point>> restarts_;
Brian Silvermanbc4fc2f2013-02-27 19:33:42 -0800567
568 // The currently running child's PID or NULL.
569 pid_t pid_;
570
571 // All of the arguments (including the name of the binary).
572 std::deque<std::string> args_;
573
574 // The name of the real binary that we were told to run.
575 std::string original_binary_;
576 // The name of the file that we're actually running.
577 std::string binary_;
578
579 // Watches original_binary_.
580 unique_ptr<FileWatch> watcher_;
581
Brian Silvermanbc4fc2f2013-02-27 19:33:42 -0800582 // Captured from the original file when we most recently started a new child
583 // process. Used to see if it actually changes or not.
584 struct stat stat_at_start_;
585 bool stat_at_start_valid_;
586
Brian Silvermand94642c2014-03-27 18:21:41 -0700587 // An event that restarts after kRestartWaitTime.
588 static EventUniquePtr restart_timeout;
589
590 // The set of children waiting to be restarted once all modifications stop.
591 static ::std::set<Child *> waiting_to_restart;
592
Brian Silvermanbc4fc2f2013-02-27 19:33:42 -0800593 DISALLOW_COPY_AND_ASSIGN(Child);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800594};
Brian Silverman52aeeac2013-08-28 16:20:53 -0700595
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800596constexpr chrono::nanoseconds Child::kProcessDieTime;
597constexpr chrono::nanoseconds Child::kRestartWaitTime;
598constexpr chrono::nanoseconds Child::kMaxRestartsTime;
599constexpr chrono::nanoseconds Child::kResumeWait;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800600
Brian Silvermand94642c2014-03-27 18:21:41 -0700601EventUniquePtr Child::restart_timeout;
602::std::set<Child *> Child::waiting_to_restart;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800603
Brian Silverman5cc661b2013-02-27 15:23:36 -0800604// Kills off the entire process group (including ourself).
605void KillChildren(bool try_nice) {
Brian Silvermand169fcd2013-02-27 13:18:47 -0800606 if (try_nice) {
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800607 static constexpr int kNiceStopSignal = SIGTERM;
608 static constexpr auto kNiceWaitTime = chrono::seconds(1);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800609
610 // Make sure that we don't just nicely stop ourself...
611 sigset_t mask;
612 sigemptyset(&mask);
613 sigaddset(&mask, kNiceStopSignal);
614 sigprocmask(SIG_BLOCK, &mask, NULL);
615
Brian Silverman5cc661b2013-02-27 15:23:36 -0800616 kill(-getpid(), kNiceStopSignal);
617
618 fflush(NULL);
Austin Schuhf2a50ba2016-12-24 16:16:26 -0800619 ::std::this_thread::sleep_for(kNiceWaitTime);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800620 }
Brian Silverman5cc661b2013-02-27 15:23:36 -0800621
Brian Silvermand169fcd2013-02-27 13:18:47 -0800622 // Send SIGKILL to our whole process group, which will forcibly terminate any
623 // of them that are still running (us for sure, maybe more too).
Brian Silverman5cc661b2013-02-27 15:23:36 -0800624 kill(-getpid(), SIGKILL);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800625}
626
Brian Silverman5cc661b2013-02-27 15:23:36 -0800627void ExitHandler() {
628 KillChildren(true);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800629}
Brian Silverman5cc661b2013-02-27 15:23:36 -0800630
631void KillChildrenSignalHandler(int signum) {
632 // If we get SIGSEGV or some other random signal who knows what's happening
633 // and we should just kill everybody immediately.
634 // This is a list of all of the signals that mean some form of "nicely stop".
635 KillChildren(signum == SIGHUP || signum == SIGINT || signum == SIGQUIT ||
Brian Silverman0eec9532013-02-27 20:24:16 -0800636 signum == SIGABRT || signum == SIGPIPE || signum == SIGTERM ||
637 signum == SIGXCPU);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800638}
639
Brian Silverman5cc661b2013-02-27 15:23:36 -0800640// Returns the currently running child with PID pid or an empty unique_ptr.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800641const unique_ptr<Child> &FindChild(pid_t pid) {
642 for (auto it = children.begin(); it != children.end(); ++it) {
643 if (pid == (*it)->pid()) {
644 return *it;
645 }
646 }
647
Brian Silverman5cc661b2013-02-27 15:23:36 -0800648 static const unique_ptr<Child> kNothing;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800649 return kNothing;
650}
651
Brian Silverman5cc661b2013-02-27 15:23:36 -0800652// Gets set up as a libevent handler for SIGCHLD.
653// Handles calling Child::ProcessDied() on the appropriate one.
654void SigCHLDReceived(int /*fd*/, short /*events*/, void *) {
Brian Silvermand169fcd2013-02-27 13:18:47 -0800655 // In a while loop in case we miss any SIGCHLDs.
656 while (true) {
657 siginfo_t infop;
658 infop.si_pid = 0;
659 if (waitid(P_ALL, 0, &infop, WEXITED | WSTOPPED | WNOHANG) != 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700660 AOS_PLOG(WARNING, "waitid failed");
Brian Silverman5cc661b2013-02-27 15:23:36 -0800661 continue;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800662 }
Brian Silverman5cc661b2013-02-27 15:23:36 -0800663 // If there are no more child process deaths to process.
Brian Silvermand169fcd2013-02-27 13:18:47 -0800664 if (infop.si_pid == 0) {
Brian Silverman5cc661b2013-02-27 15:23:36 -0800665 return;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800666 }
667
668 pid_t pid = infop.si_pid;
669 int status = infop.si_status;
670 const unique_ptr<Child> &child = FindChild(pid);
671 if (child) {
672 switch (infop.si_code) {
673 case CLD_EXITED:
Austin Schuh70551b72020-02-22 14:52:23 -0800674 LOG(WARNING) << "child " << pid << " (" << child->name()
675 << ") exited with status " << status;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800676 break;
677 case CLD_DUMPED:
Austin Schuh70551b72020-02-22 14:52:23 -0800678 LOG(INFO) << "child " << pid
679 << " actually dumped core. falling through to killed by "
680 "signal case";
James Kuszmaul3ae42262019-11-08 12:33:41 -0800681 [[fallthrough]];
682 /* FALLTHRU */
Brian Silvermand169fcd2013-02-27 13:18:47 -0800683 case CLD_KILLED:
684 // If somebody (possibly us) sent it SIGTERM that means that they just
685 // want it to stop, so it stopping isn't a WARNING.
Austin Schuh70551b72020-02-22 14:52:23 -0800686 ((status == SIGTERM) ? LOG(INFO) : LOG(WARNING))
687 << "child " << pid << " (" << child->name()
688 << ") was killed by signal " << status << " ("
689 << aos_strsignal(status) << ")";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800690 break;
691 case CLD_STOPPED:
Austin Schuh70551b72020-02-22 14:52:23 -0800692 LOG(WARNING) << "child " << pid << " (" << child->name()
693 << ") was stopped by signal " << status
694 << " (giving it a SIGCONT(" << SIGCONT << "))";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800695 kill(pid, SIGCONT);
696 continue;
697 default:
Austin Schuh70551b72020-02-22 14:52:23 -0800698 LOG(WARNING) << "something happened to child " << pid << " ("
699 << child->name() << ") (killing it)";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800700 kill(pid, SIGKILL);
701 continue;
702 }
703 } else {
Austin Schuh70551b72020-02-22 14:52:23 -0800704 LOG(WARNING) << "couldn't find a Child for pid " << pid;
Brian Silverman5cc661b2013-02-27 15:23:36 -0800705 return;
Brian Silvermand169fcd2013-02-27 13:18:47 -0800706 }
707
708 child->ProcessDied();
709 }
710}
711
Brian Silverman5cc661b2013-02-27 15:23:36 -0800712// This is used for communicating the name of the file to read processes to
713// start from main to Run.
714const char *child_list_file;
715
Austin Schuh1d4920f2020-02-22 14:56:37 -0800716void Run();
Brian Silvermand169fcd2013-02-27 13:18:47 -0800717void Main() {
Comran Morshed7f6ba792016-02-21 16:54:05 +0000718 // Set UID to 0 so we can run things as root down below. Since the starter
719 // program on the roborio runs starter.sh under "lvuser", it will continuously
720 // fail due to lack of permissions if we do not manually set the UID to admin.
721#ifdef AOS_ARCHITECTURE_arm_frc
722 if (setuid(0) != 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700723 AOS_PLOG(FATAL, "setuid(0) failed");
Comran Morshed7f6ba792016-02-21 16:54:05 +0000724 }
725#endif
726
Brian Silverman5cc661b2013-02-27 15:23:36 -0800727 if (setpgid(0 /*self*/, 0 /*make PGID the same as PID*/) != 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700728 AOS_PLOG(FATAL, "setpgid(0, 0) failed");
Brian Silverman5cc661b2013-02-27 15:23:36 -0800729 }
Brian Silvermand169fcd2013-02-27 13:18:47 -0800730
731 // Make sure that we kill all children when we exit.
Brian Silverman5cc661b2013-02-27 15:23:36 -0800732 atexit(ExitHandler);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800733 // Do it on some signals too (ones that we otherwise tend to receive and then
734 // leave all of our children going).
Brian Silverman5cc661b2013-02-27 15:23:36 -0800735 signal(SIGHUP, KillChildrenSignalHandler);
736 signal(SIGINT, KillChildrenSignalHandler);
737 signal(SIGQUIT, KillChildrenSignalHandler);
738 signal(SIGILL, KillChildrenSignalHandler);
739 signal(SIGABRT, KillChildrenSignalHandler);
740 signal(SIGFPE, KillChildrenSignalHandler);
741 signal(SIGSEGV, KillChildrenSignalHandler);
742 signal(SIGPIPE, KillChildrenSignalHandler);
743 signal(SIGTERM, KillChildrenSignalHandler);
744 signal(SIGBUS, KillChildrenSignalHandler);
745 signal(SIGXCPU, KillChildrenSignalHandler);
Brian Silverman35df22f2015-12-27 17:57:10 -0800746
747#ifdef AOS_ARCHITECTURE_arm_frc
748 // Just allow overcommit memory like usual. Various processes map memory they
749 // will never use, and the roboRIO doesn't have enough RAM to handle it.
750 // This is in here instead of starter.sh because starter.sh doesn't run with
751 // permissions on a roboRIO.
Austin Schuhf257f3c2019-10-27 21:00:43 -0700752 AOS_CHECK(system("echo 0 > /proc/sys/vm/overcommit_memory") == 0);
Austin Schuhd3c915c2020-10-24 21:26:48 -0700753
754 // Configure throttling so we reserve 5% of the CPU for non-rt work.
755 // This makes things significantly more stable when work explodes.
756 // This is in here instead of starter.sh for the same reasons, starter is suid
757 // and runs as admin, so this actually works.
758 AOS_CHECK(system("/sbin/sysctl -w kernel.sched_rt_period_us=1000000") == 0);
759 AOS_CHECK(system("/sbin/sysctl -w kernel.sched_rt_runtime_us=950000") == 0);
Brian Silverman35df22f2015-12-27 17:57:10 -0800760#endif
James Kuszmaul3ae42262019-11-08 12:33:41 -0800761
Brian Silvermand169fcd2013-02-27 13:18:47 -0800762 libevent_base = EventBaseUniquePtr(event_base_new());
763
Austin Schuh1d4920f2020-02-22 14:56:37 -0800764 Run();
Brian Silvermand169fcd2013-02-27 13:18:47 -0800765
766 event_base_dispatch(libevent_base.get());
Austin Schuh70551b72020-02-22 14:52:23 -0800767 LOG(FATAL) << "event_base_dispatch(" << libevent_base.get() << ") returned";
Brian Silvermand169fcd2013-02-27 13:18:47 -0800768}
769
Brian Silverman0eec9532013-02-27 20:24:16 -0800770// This is the callback for when core creates the file indicating that it has
771// started.
Austin Schuh1d4920f2020-02-22 14:56:37 -0800772void Run() {
Brian Silverman0eec9532013-02-27 20:24:16 -0800773 std::ifstream list_file(child_list_file);
James Kuszmaul3ae42262019-11-08 12:33:41 -0800774
Brian Silverman0eec9532013-02-27 20:24:16 -0800775 while (true) {
776 std::string child_name;
777 getline(list_file, child_name);
778 if ((list_file.rdstate() & std::ios_base::eofbit) != 0) {
779 break;
780 }
781 if (list_file.rdstate() != 0) {
Austin Schuh70551b72020-02-22 14:52:23 -0800782 LOG(FATAL) << "reading input file " << child_list_file << " failed";
Brian Silverman0eec9532013-02-27 20:24:16 -0800783 }
784 children.push_back(unique_ptr<Child>(new Child(child_name)));
785 }
786
787 EventUniquePtr sigchld(event_new(libevent_base.get(), SIGCHLD,
788 EV_SIGNAL | EV_PERSIST,
789 SigCHLDReceived, NULL));
790 event_add(sigchld.release(), NULL);
791}
792
Brian Silverman8070a222013-02-28 15:01:36 -0800793const char *kArgsHelp = "[OPTION]... START_LIST\n"
794 "Start all of the robot code binaries in START_LIST.\n"
795 "\n"
796 "START_LIST is the file to read binaries (looked up on PATH) to run.\n"
797 " --help display this help and exit\n";
798void PrintHelp() {
799 fprintf(stderr, "Usage: %s %s", program_invocation_name, kArgsHelp);
800}
801
Brian Silvermand169fcd2013-02-27 13:18:47 -0800802} // namespace starter
803} // namespace aos
804
805int main(int argc, char *argv[]) {
Austin Schuh094d09b2020-11-20 23:26:52 -0800806 ::aos::InitGoogle(&argc, &argv);
807
Brian Silverman8070a222013-02-28 15:01:36 -0800808 if (argc != 2) {
809 aos::starter::PrintHelp();
Brian Silvermand169fcd2013-02-27 13:18:47 -0800810 exit(EXIT_FAILURE);
Brian Silvermand169fcd2013-02-27 13:18:47 -0800811 }
Brian Silverman8070a222013-02-28 15:01:36 -0800812 if (strcmp(argv[1], "--help") == 0) {
813 aos::starter::PrintHelp();
814 exit(EXIT_SUCCESS);
815 }
816
Brian Silvermand169fcd2013-02-27 13:18:47 -0800817 aos::starter::child_list_file = argv[1];
818
819 aos::starter::Main();
820}