blob: 949c8a7f2901d2a63fbaafc856db96a4baa6a932 [file] [log] [blame]
Philipp Schrader790cb542023-07-05 21:06:52 -07001#include "aos/starter/starterd_lib.h"
Tyler Chatowa79419d2020-08-12 20:12:11 -07002
Tyler Chatowa79419d2020-08-12 20:12:11 -07003#include <algorithm>
4#include <utility>
5
James Kuszmaul293b2172021-11-10 16:20:48 -08006#include "absl/strings/str_format.h"
Tyler Chatowa79419d2020-08-12 20:12:11 -07007#include "glog/logging.h"
8#include "glog/stl_logging.h"
9
Philipp Schrader790cb542023-07-05 21:06:52 -070010#include "aos/json_to_flatbuffer.h"
11
Austin Schuh4d275fc2022-09-16 15:42:45 -070012// FLAGS_shm_base is defined elsewhere, declare it here so it can be used
13// to override the shared memory folder for unit testing.
14DECLARE_string(shm_base);
15// FLAGS_permissions is defined elsewhere, declare it here so it can be used
16// to set the file permissions on the shared memory block.
17DECLARE_uint32(permissions);
18
Tyler Chatowa79419d2020-08-12 20:12:11 -070019namespace aos {
20namespace starter {
21
James Kuszmaul293b2172021-11-10 16:20:48 -080022const aos::Channel *StatusChannelForNode(const aos::Configuration *config,
23 const aos::Node *node) {
24 return configuration::GetChannel<Status>(config, "/aos", "", node);
25}
26const aos::Channel *StarterRpcChannelForNode(const aos::Configuration *config,
27 const aos::Node *node) {
28 return configuration::GetChannel<StarterRpc>(config, "/aos", "", node);
29}
30
Tyler Chatowa79419d2020-08-12 20:12:11 -070031Starter::Starter(const aos::Configuration *event_loop_config)
32 : config_msg_(event_loop_config),
33 event_loop_(event_loop_config),
34 status_sender_(event_loop_.MakeSender<aos::starter::Status>("/aos")),
Austin Schuhfc304942021-10-16 14:20:05 -070035 status_timer_(event_loop_.AddTimer([this] {
36 SendStatus();
37 status_count_ = 0;
38 })),
Austin Schuh59398d32023-05-03 08:10:55 -070039 cleanup_timer_(event_loop_.AddTimer([this] {
40 event_loop_.Exit();
41 LOG(INFO) << "Starter event loop exit finished.";
42 })),
Austin Schuhfc304942021-10-16 14:20:05 -070043 max_status_count_(
44 event_loop_.GetChannel<aos::starter::Status>("/aos")->frequency() -
45 1),
Austin Schuh4d275fc2022-09-16 15:42:45 -070046 shm_base_(FLAGS_shm_base),
Tyler Chatowa79419d2020-08-12 20:12:11 -070047 listener_(&event_loop_,
James Kuszmaul6295a642022-03-22 15:23:59 -070048 [this](signalfd_siginfo signal) { OnSignal(signal); }),
49 top_(&event_loop_) {
Tyler Chatowa79419d2020-08-12 20:12:11 -070050 event_loop_.SkipAosLog();
51
52 event_loop_.OnRun([this] {
53 status_timer_->Setup(event_loop_.monotonic_now(),
Austin Schuhfc304942021-10-16 14:20:05 -070054 std::chrono::milliseconds(1000));
Tyler Chatowa79419d2020-08-12 20:12:11 -070055 });
56
James Kuszmaul293b2172021-11-10 16:20:48 -080057 if (!aos::configuration::MultiNode(config_msg_)) {
58 event_loop_.MakeWatcher(
59 "/aos",
60 [this](const aos::starter::StarterRpc &cmd) { HandleStarterRpc(cmd); });
61 } else {
62 for (const aos::Node *node : aos::configuration::GetNodes(config_msg_)) {
63 const Channel *channel = StarterRpcChannelForNode(config_msg_, node);
64 CHECK(channel != nullptr) << ": Failed to find channel /aos for "
65 << StarterRpc::GetFullyQualifiedName() << " on "
66 << node->name()->string_view();
67 if (!aos::configuration::ChannelIsReadableOnNode(channel,
68 event_loop_.node())) {
69 LOG(INFO) << "StarterRpc channel "
70 << aos::configuration::StrippedChannelToString(channel)
71 << " is not readable on "
72 << event_loop_.node()->name()->string_view();
73 } else {
74 event_loop_.MakeWatcher(channel->name()->string_view(),
75 [this](const aos::starter::StarterRpc &cmd) {
76 HandleStarterRpc(cmd);
77 });
78 }
Tyler Chatowa79419d2020-08-12 20:12:11 -070079 }
James Kuszmaul293b2172021-11-10 16:20:48 -080080 }
Tyler Chatowa79419d2020-08-12 20:12:11 -070081
Austin Schuh4d275fc2022-09-16 15:42:45 -070082 // Catalogue all the applications for this node, so we can keep an eye on
83 // them.
Tyler Chatowa79419d2020-08-12 20:12:11 -070084 if (config_msg_->has_applications()) {
85 const flatbuffers::Vector<flatbuffers::Offset<aos::Application>>
86 *applications = config_msg_->applications();
Ravago Jones7e2dd322020-11-21 15:58:58 -080087
88 if (aos::configuration::MultiNode(config_msg_)) {
89 std::string_view current_node = event_loop_.node()->name()->string_view();
90 for (const aos::Application *application : *applications) {
Austin Schuh228609b2023-03-21 15:43:11 -070091 CHECK(application->has_nodes())
92 << ": Missing nodes on " << aos::FlatbufferToJson(application);
Ravago Jones7e2dd322020-11-21 15:58:58 -080093 for (const flatbuffers::String *node : *application->nodes()) {
94 if (node->string_view() == current_node) {
95 AddApplication(application);
96 break;
97 }
98 }
99 }
100 } else {
101 for (const aos::Application *application : *applications) {
102 AddApplication(application);
103 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700104 }
105 }
Austin Schuh4d275fc2022-09-16 15:42:45 -0700106
107 // Catalogue all the intranode channels for this node, and create
108 // MemoryMappedQueues for each one to allocate the shared memory before
109 // spawning any shasta process.
110 if (config_msg_->has_channels()) {
111 const aos::Node *this_node = event_loop_.node();
112 std::vector<const aos::Channel *> intranode_channels;
113 for (const aos::Channel *channel : *config_msg_->channels()) {
114 if (aos::configuration::ChannelIsReadableOnNode(channel, this_node)) {
115 AddChannel(channel);
116 }
117 }
118 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700119}
120
James Kuszmaul293b2172021-11-10 16:20:48 -0800121void Starter::HandleStarterRpc(const StarterRpc &command) {
122 if (!command.has_command() || !command.has_name() || exiting_) {
123 return;
124 }
125
126 LOG(INFO) << "Received " << aos::FlatbufferToJson(&command);
127
128 if (command.has_nodes()) {
129 CHECK(aos::configuration::MultiNode(config_msg_));
130 bool relevant_to_this_node = false;
131 for (const flatbuffers::String *node : *command.nodes()) {
132 if (node->string_view() == event_loop_.node()->name()->string_view()) {
133 relevant_to_this_node = true;
134 }
135 }
136 if (!relevant_to_this_node) {
137 return;
138 }
139 }
140 // If not populated, restart regardless of node.
141
142 auto search = applications_.find(command.name()->str());
143 if (search != applications_.end()) {
144 // If an applicatione exists by the given name, dispatch the command
145 search->second.HandleCommand(command.command());
146 }
147}
148
James Kuszmaul6295a642022-03-22 15:23:59 -0700149void Starter::HandleStateChange() {
150 std::set<pid_t> all_pids;
151 for (const auto &pair : applications_) {
152 if (pair.second.get_pid() > 0 &&
153 pair.second.status() != aos::starter::State::STOPPED) {
154 all_pids.insert(pair.second.get_pid());
155 }
156 }
157 top_.set_track_pids(all_pids);
158
Austin Schuhfc304942021-10-16 14:20:05 -0700159 if (status_count_ < max_status_count_) {
160 SendStatus();
161 ++status_count_;
162 } else {
163 VLOG(1) << "That's enough " << status_count_ << " " << max_status_count_;
164 }
165}
166
Tyler Chatowa79419d2020-08-12 20:12:11 -0700167void Starter::Cleanup() {
168 if (exiting_) {
169 return;
170 }
171 exiting_ = true;
172 for (auto &application : applications_) {
173 application.second.Terminate();
174 }
175 cleanup_timer_->Setup(event_loop_.monotonic_now() +
176 std::chrono::milliseconds(1500));
177}
178
179void Starter::OnSignal(signalfd_siginfo info) {
Tyler Chatowa79419d2020-08-12 20:12:11 -0700180 if (info.ssi_signo == SIGCHLD) {
181 // SIGCHLD messages can be collapsed if multiple are received, so all
182 // applications must check their status.
183 for (auto iter = applications_.begin(); iter != applications_.end();) {
184 if (iter->second.MaybeHandleSignal()) {
185 iter = applications_.erase(iter);
186 } else {
187 ++iter;
188 }
189 }
190
191 if (exiting_ && applications_.empty()) {
192 event_loop_.Exit();
193 }
Austin Schuh3204b332021-10-16 14:20:10 -0700194 } else {
195 LOG(INFO) << "Received signal '" << strsignal(info.ssi_signo) << "'";
196
197 if (std::find(kStarterDeath.begin(), kStarterDeath.end(), info.ssi_signo) !=
198 kStarterDeath.end()) {
199 LOG(WARNING) << "Starter shutting down";
200 Cleanup();
201 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700202 }
203}
204
205Application *Starter::AddApplication(const aos::Application *application) {
James Kuszmaul6295a642022-03-22 15:23:59 -0700206 auto [iter, success] = applications_.try_emplace(
207 application->name()->str(), application, &event_loop_,
208 [this]() { HandleStateChange(); });
Tyler Chatowa79419d2020-08-12 20:12:11 -0700209 if (success) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800210 // We should be catching and handling SIGCHLD correctly in the starter, so
211 // don't leave in the crutch for polling for the child process status (this
212 // is less about efficiency, and more about making sure bit rot doesn't
213 // result in the signal handling breaking).
214 iter->second.DisableChildDeathPolling();
Tyler Chatowa79419d2020-08-12 20:12:11 -0700215 return &(iter->second);
216 }
217 return nullptr;
218}
219
220void Starter::Run() {
Tyler Chatow03fdb2a2020-12-26 18:39:36 -0800221#ifdef AOS_ARCHITECTURE_arm_frc
222 PCHECK(setuid(0) == 0) << "Failed to change user to root";
223#endif
224
Tyler Chatowa79419d2020-08-12 20:12:11 -0700225 for (auto &application : applications_) {
Austin Schuh5f79a5a2021-10-12 17:46:50 -0700226 if (application.second.autostart()) {
227 application.second.Start();
228 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700229 }
230
231 event_loop_.Run();
232}
233
234void Starter::SendStatus() {
235 aos::Sender<aos::starter::Status>::Builder builder =
236 status_sender_.MakeBuilder();
237
238 std::vector<flatbuffers::Offset<aos::starter::ApplicationStatus>> statuses;
239
240 for (auto &application : applications_) {
James Kuszmaul6295a642022-03-22 15:23:59 -0700241 statuses.push_back(application.second.PopulateStatus(builder.fbb(), &top_));
Tyler Chatowa79419d2020-08-12 20:12:11 -0700242 }
243
244 auto statuses_fbs = builder.fbb()->CreateVector(statuses);
245
246 aos::starter::Status::Builder status_builder(*builder.fbb());
247 status_builder.add_statuses(statuses_fbs);
milind1f1dca32021-07-03 13:50:07 -0700248 builder.CheckOk(builder.Send(status_builder.Finish()));
Tyler Chatowa79419d2020-08-12 20:12:11 -0700249}
250
Austin Schuh4d275fc2022-09-16 15:42:45 -0700251void Starter::AddChannel(const aos::Channel *channel) {
252 CHECK_NOTNULL(channel);
253 shm_queues_.emplace_back(std::make_unique<aos::ipc_lib::MemoryMappedQueue>(
254 shm_base_, FLAGS_permissions, event_loop_.configuration(), channel));
255 VLOG(1) << "Created MemoryMappedQueue for "
256 << aos::configuration::StrippedChannelToString(channel) << " under "
257 << shm_base_;
258}
259
Tyler Chatowa79419d2020-08-12 20:12:11 -0700260} // namespace starter
261} // namespace aos