blob: b8b734374b0b3573f92d7d23462e68176dcf5007 [file] [log] [blame]
Tyler Chatowa79419d2020-08-12 20:12:11 -07001#include "starterd_lib.h"
2
Tyler Chatowa79419d2020-08-12 20:12:11 -07003#include <algorithm>
4#include <utility>
5
James Kuszmaul293b2172021-11-10 16:20:48 -08006#include "absl/strings/str_format.h"
7#include "aos/json_to_flatbuffer.h"
Tyler Chatowa79419d2020-08-12 20:12:11 -07008#include "glog/logging.h"
9#include "glog/stl_logging.h"
10
Austin Schuh4d275fc2022-09-16 15:42:45 -070011// FLAGS_shm_base is defined elsewhere, declare it here so it can be used
12// to override the shared memory folder for unit testing.
13DECLARE_string(shm_base);
14// FLAGS_permissions is defined elsewhere, declare it here so it can be used
15// to set the file permissions on the shared memory block.
16DECLARE_uint32(permissions);
17
Tyler Chatowa79419d2020-08-12 20:12:11 -070018namespace aos {
19namespace starter {
20
James Kuszmaul293b2172021-11-10 16:20:48 -080021const aos::Channel *StatusChannelForNode(const aos::Configuration *config,
22 const aos::Node *node) {
23 return configuration::GetChannel<Status>(config, "/aos", "", node);
24}
25const aos::Channel *StarterRpcChannelForNode(const aos::Configuration *config,
26 const aos::Node *node) {
27 return configuration::GetChannel<StarterRpc>(config, "/aos", "", node);
28}
29
Tyler Chatowa79419d2020-08-12 20:12:11 -070030Starter::Starter(const aos::Configuration *event_loop_config)
31 : config_msg_(event_loop_config),
32 event_loop_(event_loop_config),
33 status_sender_(event_loop_.MakeSender<aos::starter::Status>("/aos")),
Austin Schuhfc304942021-10-16 14:20:05 -070034 status_timer_(event_loop_.AddTimer([this] {
35 SendStatus();
36 status_count_ = 0;
37 })),
Tyler Chatowa79419d2020-08-12 20:12:11 -070038 cleanup_timer_(event_loop_.AddTimer([this] { event_loop_.Exit(); })),
Austin Schuhfc304942021-10-16 14:20:05 -070039 max_status_count_(
40 event_loop_.GetChannel<aos::starter::Status>("/aos")->frequency() -
41 1),
Austin Schuh4d275fc2022-09-16 15:42:45 -070042 shm_base_(FLAGS_shm_base),
Tyler Chatowa79419d2020-08-12 20:12:11 -070043 listener_(&event_loop_,
James Kuszmaul6295a642022-03-22 15:23:59 -070044 [this](signalfd_siginfo signal) { OnSignal(signal); }),
45 top_(&event_loop_) {
Tyler Chatowa79419d2020-08-12 20:12:11 -070046 event_loop_.SkipAosLog();
47
48 event_loop_.OnRun([this] {
49 status_timer_->Setup(event_loop_.monotonic_now(),
Austin Schuhfc304942021-10-16 14:20:05 -070050 std::chrono::milliseconds(1000));
Tyler Chatowa79419d2020-08-12 20:12:11 -070051 });
52
James Kuszmaul293b2172021-11-10 16:20:48 -080053 if (!aos::configuration::MultiNode(config_msg_)) {
54 event_loop_.MakeWatcher(
55 "/aos",
56 [this](const aos::starter::StarterRpc &cmd) { HandleStarterRpc(cmd); });
57 } else {
58 for (const aos::Node *node : aos::configuration::GetNodes(config_msg_)) {
59 const Channel *channel = StarterRpcChannelForNode(config_msg_, node);
60 CHECK(channel != nullptr) << ": Failed to find channel /aos for "
61 << StarterRpc::GetFullyQualifiedName() << " on "
62 << node->name()->string_view();
63 if (!aos::configuration::ChannelIsReadableOnNode(channel,
64 event_loop_.node())) {
65 LOG(INFO) << "StarterRpc channel "
66 << aos::configuration::StrippedChannelToString(channel)
67 << " is not readable on "
68 << event_loop_.node()->name()->string_view();
69 } else {
70 event_loop_.MakeWatcher(channel->name()->string_view(),
71 [this](const aos::starter::StarterRpc &cmd) {
72 HandleStarterRpc(cmd);
73 });
74 }
Tyler Chatowa79419d2020-08-12 20:12:11 -070075 }
James Kuszmaul293b2172021-11-10 16:20:48 -080076 }
Tyler Chatowa79419d2020-08-12 20:12:11 -070077
Austin Schuh4d275fc2022-09-16 15:42:45 -070078 // Catalogue all the applications for this node, so we can keep an eye on
79 // them.
Tyler Chatowa79419d2020-08-12 20:12:11 -070080 if (config_msg_->has_applications()) {
81 const flatbuffers::Vector<flatbuffers::Offset<aos::Application>>
82 *applications = config_msg_->applications();
Ravago Jones7e2dd322020-11-21 15:58:58 -080083
84 if (aos::configuration::MultiNode(config_msg_)) {
85 std::string_view current_node = event_loop_.node()->name()->string_view();
86 for (const aos::Application *application : *applications) {
Austin Schuh228609b2023-03-21 15:43:11 -070087 CHECK(application->has_nodes())
88 << ": Missing nodes on " << aos::FlatbufferToJson(application);
Ravago Jones7e2dd322020-11-21 15:58:58 -080089 for (const flatbuffers::String *node : *application->nodes()) {
90 if (node->string_view() == current_node) {
91 AddApplication(application);
92 break;
93 }
94 }
95 }
96 } else {
97 for (const aos::Application *application : *applications) {
98 AddApplication(application);
99 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700100 }
101 }
Austin Schuh4d275fc2022-09-16 15:42:45 -0700102
103 // Catalogue all the intranode channels for this node, and create
104 // MemoryMappedQueues for each one to allocate the shared memory before
105 // spawning any shasta process.
106 if (config_msg_->has_channels()) {
107 const aos::Node *this_node = event_loop_.node();
108 std::vector<const aos::Channel *> intranode_channels;
109 for (const aos::Channel *channel : *config_msg_->channels()) {
110 if (aos::configuration::ChannelIsReadableOnNode(channel, this_node)) {
111 AddChannel(channel);
112 }
113 }
114 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700115}
116
James Kuszmaul293b2172021-11-10 16:20:48 -0800117void Starter::HandleStarterRpc(const StarterRpc &command) {
118 if (!command.has_command() || !command.has_name() || exiting_) {
119 return;
120 }
121
122 LOG(INFO) << "Received " << aos::FlatbufferToJson(&command);
123
124 if (command.has_nodes()) {
125 CHECK(aos::configuration::MultiNode(config_msg_));
126 bool relevant_to_this_node = false;
127 for (const flatbuffers::String *node : *command.nodes()) {
128 if (node->string_view() == event_loop_.node()->name()->string_view()) {
129 relevant_to_this_node = true;
130 }
131 }
132 if (!relevant_to_this_node) {
133 return;
134 }
135 }
136 // If not populated, restart regardless of node.
137
138 auto search = applications_.find(command.name()->str());
139 if (search != applications_.end()) {
140 // If an applicatione exists by the given name, dispatch the command
141 search->second.HandleCommand(command.command());
142 }
143}
144
James Kuszmaul6295a642022-03-22 15:23:59 -0700145void Starter::HandleStateChange() {
146 std::set<pid_t> all_pids;
147 for (const auto &pair : applications_) {
148 if (pair.second.get_pid() > 0 &&
149 pair.second.status() != aos::starter::State::STOPPED) {
150 all_pids.insert(pair.second.get_pid());
151 }
152 }
153 top_.set_track_pids(all_pids);
154
Austin Schuhfc304942021-10-16 14:20:05 -0700155 if (status_count_ < max_status_count_) {
156 SendStatus();
157 ++status_count_;
158 } else {
159 VLOG(1) << "That's enough " << status_count_ << " " << max_status_count_;
160 }
161}
162
Tyler Chatowa79419d2020-08-12 20:12:11 -0700163void Starter::Cleanup() {
164 if (exiting_) {
165 return;
166 }
167 exiting_ = true;
168 for (auto &application : applications_) {
169 application.second.Terminate();
170 }
171 cleanup_timer_->Setup(event_loop_.monotonic_now() +
172 std::chrono::milliseconds(1500));
173}
174
175void Starter::OnSignal(signalfd_siginfo info) {
Tyler Chatowa79419d2020-08-12 20:12:11 -0700176 if (info.ssi_signo == SIGCHLD) {
177 // SIGCHLD messages can be collapsed if multiple are received, so all
178 // applications must check their status.
179 for (auto iter = applications_.begin(); iter != applications_.end();) {
180 if (iter->second.MaybeHandleSignal()) {
181 iter = applications_.erase(iter);
182 } else {
183 ++iter;
184 }
185 }
186
187 if (exiting_ && applications_.empty()) {
188 event_loop_.Exit();
189 }
Austin Schuh3204b332021-10-16 14:20:10 -0700190 } else {
191 LOG(INFO) << "Received signal '" << strsignal(info.ssi_signo) << "'";
192
193 if (std::find(kStarterDeath.begin(), kStarterDeath.end(), info.ssi_signo) !=
194 kStarterDeath.end()) {
195 LOG(WARNING) << "Starter shutting down";
196 Cleanup();
197 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700198 }
199}
200
201Application *Starter::AddApplication(const aos::Application *application) {
James Kuszmaul6295a642022-03-22 15:23:59 -0700202 auto [iter, success] = applications_.try_emplace(
203 application->name()->str(), application, &event_loop_,
204 [this]() { HandleStateChange(); });
Tyler Chatowa79419d2020-08-12 20:12:11 -0700205 if (success) {
James Kuszmauld42edb42022-01-07 18:00:16 -0800206 // We should be catching and handling SIGCHLD correctly in the starter, so
207 // don't leave in the crutch for polling for the child process status (this
208 // is less about efficiency, and more about making sure bit rot doesn't
209 // result in the signal handling breaking).
210 iter->second.DisableChildDeathPolling();
Tyler Chatowa79419d2020-08-12 20:12:11 -0700211 return &(iter->second);
212 }
213 return nullptr;
214}
215
216void Starter::Run() {
Tyler Chatow03fdb2a2020-12-26 18:39:36 -0800217#ifdef AOS_ARCHITECTURE_arm_frc
218 PCHECK(setuid(0) == 0) << "Failed to change user to root";
219#endif
220
Tyler Chatowa79419d2020-08-12 20:12:11 -0700221 for (auto &application : applications_) {
Austin Schuh5f79a5a2021-10-12 17:46:50 -0700222 if (application.second.autostart()) {
223 application.second.Start();
224 }
Tyler Chatowa79419d2020-08-12 20:12:11 -0700225 }
226
227 event_loop_.Run();
228}
229
230void Starter::SendStatus() {
231 aos::Sender<aos::starter::Status>::Builder builder =
232 status_sender_.MakeBuilder();
233
234 std::vector<flatbuffers::Offset<aos::starter::ApplicationStatus>> statuses;
235
236 for (auto &application : applications_) {
James Kuszmaul6295a642022-03-22 15:23:59 -0700237 statuses.push_back(application.second.PopulateStatus(builder.fbb(), &top_));
Tyler Chatowa79419d2020-08-12 20:12:11 -0700238 }
239
240 auto statuses_fbs = builder.fbb()->CreateVector(statuses);
241
242 aos::starter::Status::Builder status_builder(*builder.fbb());
243 status_builder.add_statuses(statuses_fbs);
milind1f1dca32021-07-03 13:50:07 -0700244 builder.CheckOk(builder.Send(status_builder.Finish()));
Tyler Chatowa79419d2020-08-12 20:12:11 -0700245}
246
Austin Schuh4d275fc2022-09-16 15:42:45 -0700247void Starter::AddChannel(const aos::Channel *channel) {
248 CHECK_NOTNULL(channel);
249 shm_queues_.emplace_back(std::make_unique<aos::ipc_lib::MemoryMappedQueue>(
250 shm_base_, FLAGS_permissions, event_loop_.configuration(), channel));
251 VLOG(1) << "Created MemoryMappedQueue for "
252 << aos::configuration::StrippedChannelToString(channel) << " under "
253 << shm_base_;
254}
255
Tyler Chatowa79419d2020-08-12 20:12:11 -0700256} // namespace starter
257} // namespace aos