blob: 0db7291cff625201d54bd3a079e57944bee52895 [file] [log] [blame]
Alex Perrycb7da4b2019-08-28 19:35:56 -07001#include "aos/events/shm_event_loop.h"
2
3#include <sys/mman.h>
4#include <sys/stat.h>
Austin Schuh39788ff2019-12-01 18:22:57 -08005#include <sys/syscall.h>
Alex Perrycb7da4b2019-08-28 19:35:56 -07006#include <sys/types.h>
7#include <unistd.h>
8#include <algorithm>
9#include <atomic>
10#include <chrono>
Austin Schuh39788ff2019-12-01 18:22:57 -080011#include <iterator>
Alex Perrycb7da4b2019-08-28 19:35:56 -070012#include <stdexcept>
13
14#include "aos/events/epoll.h"
Austin Schuh39788ff2019-12-01 18:22:57 -080015#include "aos/events/event_loop_generated.h"
16#include "aos/events/timing_statistics.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070017#include "aos/ipc_lib/lockless_queue.h"
Austin Schuh39788ff2019-12-01 18:22:57 -080018#include "aos/ipc_lib/signalfd.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070019#include "aos/realtime.h"
Austin Schuh32fd5a72019-12-01 22:20:26 -080020#include "aos/stl_mutex/stl_mutex.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070021#include "aos/util/phased_loop.h"
Austin Schuh39788ff2019-12-01 18:22:57 -080022#include "glog/logging.h"
Alex Perrycb7da4b2019-08-28 19:35:56 -070023
Austin Schuhe84c3ed2019-12-14 15:29:48 -080024namespace {
25
26// Returns the portion of the path after the last /. This very much assumes
27// that the application name is null terminated.
28const char *Filename(const char *path) {
29 const std::string_view path_string_view = path;
30 auto last_slash_pos = path_string_view.find_last_of("/");
31
32 return last_slash_pos == std::string_view::npos ? path
33 : path + last_slash_pos + 1;
34}
35
36} // namespace
37
Alex Perrycb7da4b2019-08-28 19:35:56 -070038DEFINE_string(shm_base, "/dev/shm/aos",
39 "Directory to place queue backing mmaped files in.");
40DEFINE_uint32(permissions, 0770,
41 "Permissions to make shared memory files and folders.");
Austin Schuhe84c3ed2019-12-14 15:29:48 -080042DEFINE_string(application_name, Filename(program_invocation_name),
43 "The application name");
Alex Perrycb7da4b2019-08-28 19:35:56 -070044
45namespace aos {
46
Austin Schuhcdab6192019-12-29 17:47:46 -080047void SetShmBase(const std::string_view base) {
48 FLAGS_shm_base = std::string(base) + "/dev/shm/aos";
49}
50
Alex Perrycb7da4b2019-08-28 19:35:56 -070051std::string ShmFolder(const Channel *channel) {
52 CHECK(channel->has_name());
53 CHECK_EQ(channel->name()->string_view()[0], '/');
54 return FLAGS_shm_base + channel->name()->str() + "/";
55}
56std::string ShmPath(const Channel *channel) {
57 CHECK(channel->has_type());
Austin Schuhad154822019-12-27 15:45:13 -080058 return ShmFolder(channel) + channel->type()->str() + ".v1";
Alex Perrycb7da4b2019-08-28 19:35:56 -070059}
60
61class MMapedQueue {
62 public:
Austin Schuhaa79e4e2019-12-29 20:43:32 -080063 MMapedQueue(const Channel *channel,
64 const std::chrono::seconds channel_storage_duration) {
Alex Perrycb7da4b2019-08-28 19:35:56 -070065 std::string path = ShmPath(channel);
66
Austin Schuh80c7fce2019-12-05 20:48:43 -080067 config_.num_watchers = channel->num_watchers();
68 config_.num_senders = channel->num_senders();
Austin Schuhaa79e4e2019-12-29 20:43:32 -080069 config_.queue_size =
70 channel_storage_duration.count() * channel->frequency();
Alex Perrycb7da4b2019-08-28 19:35:56 -070071 config_.message_data_size = channel->max_size();
72
73 size_ = ipc_lib::LocklessQueueMemorySize(config_);
74
75 MkdirP(path);
76
77 // There are 2 cases. Either the file already exists, or it does not
78 // already exist and we need to create it. Start by trying to create it. If
79 // that fails, the file has already been created and we can open it
80 // normally.. Once the file has been created it wil never be deleted.
81 fd_ = open(path.c_str(), O_RDWR | O_CREAT | O_EXCL,
82 O_CLOEXEC | FLAGS_permissions);
83 if (fd_ == -1 && errno == EEXIST) {
84 VLOG(1) << path << " already created.";
85 // File already exists.
86 fd_ = open(path.c_str(), O_RDWR, O_CLOEXEC);
87 PCHECK(fd_ != -1) << ": Failed to open " << path;
88 while (true) {
89 struct stat st;
90 PCHECK(fstat(fd_, &st) == 0);
91 if (st.st_size != 0) {
92 CHECK_EQ(static_cast<size_t>(st.st_size), size_)
93 << ": Size of " << path
94 << " doesn't match expected size of backing queue file. Did the "
95 "queue definition change?";
96 break;
97 } else {
98 // The creating process didn't get around to it yet. Give it a bit.
99 std::this_thread::sleep_for(std::chrono::milliseconds(10));
100 VLOG(1) << path << " is zero size, waiting";
101 }
102 }
103 } else {
104 VLOG(1) << "Created " << path;
105 PCHECK(fd_ != -1) << ": Failed to open " << path;
106 PCHECK(ftruncate(fd_, size_) == 0);
107 }
108
109 data_ = mmap(NULL, size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0);
110 PCHECK(data_ != MAP_FAILED);
111
112 ipc_lib::InitializeLocklessQueueMemory(memory(), config_);
113 }
114
115 ~MMapedQueue() {
116 PCHECK(munmap(data_, size_) == 0);
117 PCHECK(close(fd_) == 0);
118 }
119
120 ipc_lib::LocklessQueueMemory *memory() const {
121 return reinterpret_cast<ipc_lib::LocklessQueueMemory *>(data_);
122 }
123
Austin Schuh39788ff2019-12-01 18:22:57 -0800124 const ipc_lib::LocklessQueueConfiguration &config() const { return config_; }
Alex Perrycb7da4b2019-08-28 19:35:56 -0700125
126 private:
James Kuszmaul3ae42262019-11-08 12:33:41 -0800127 void MkdirP(std::string_view path) {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700128 auto last_slash_pos = path.find_last_of("/");
129
James Kuszmaul3ae42262019-11-08 12:33:41 -0800130 std::string folder(last_slash_pos == std::string_view::npos
131 ? std::string_view("")
Alex Perrycb7da4b2019-08-28 19:35:56 -0700132 : path.substr(0, last_slash_pos));
Austin Schuh8ec76182019-12-23 16:28:00 -0800133 if (folder.empty()) return;
134 MkdirP(folder);
135 VLOG(1) << "Creating " << folder;
136 const int result = mkdir(folder.c_str(), FLAGS_permissions);
137 if (result == -1 && errno == EEXIST) {
138 VLOG(1) << "Already exists";
139 return;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700140 }
Austin Schuh8ec76182019-12-23 16:28:00 -0800141 PCHECK(result == 0) << ": Error creating " << folder;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700142 }
143
144 ipc_lib::LocklessQueueConfiguration config_;
145
146 int fd_;
147
148 size_t size_;
149 void *data_;
150};
151
Austin Schuh217a9782019-12-21 23:02:50 -0800152namespace {
153
Austin Schuh217a9782019-12-21 23:02:50 -0800154const Node *MaybeMyNode(const Configuration *configuration) {
155 if (!configuration->has_nodes()) {
156 return nullptr;
157 }
Alex Perrycb7da4b2019-08-28 19:35:56 -0700158
Austin Schuh217a9782019-12-21 23:02:50 -0800159 return configuration::GetMyNode(configuration);
160}
Alex Perrycb7da4b2019-08-28 19:35:56 -0700161
162namespace chrono = ::std::chrono;
163
Austin Schuh39788ff2019-12-01 18:22:57 -0800164} // namespace
165
Austin Schuh217a9782019-12-21 23:02:50 -0800166ShmEventLoop::ShmEventLoop(const Configuration *configuration)
167 : EventLoop(configuration),
Austin Schuhe84c3ed2019-12-14 15:29:48 -0800168 name_(FLAGS_application_name),
Austin Schuh15649d62019-12-28 16:36:38 -0800169 node_(MaybeMyNode(configuration)) {
170 if (configuration->has_nodes()) {
171 CHECK(node_ != nullptr) << ": Couldn't find node in config.";
172 }
173}
Austin Schuh217a9782019-12-21 23:02:50 -0800174
Austin Schuh39788ff2019-12-01 18:22:57 -0800175namespace internal {
176
177class SimpleShmFetcher {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700178 public:
Austin Schuhaa79e4e2019-12-29 20:43:32 -0800179 explicit SimpleShmFetcher(EventLoop *event_loop, const Channel *channel)
Austin Schuhf5652592019-12-29 16:26:15 -0800180 : channel_(channel),
Austin Schuhaa79e4e2019-12-29 20:43:32 -0800181 lockless_queue_memory_(
182 channel,
Brian Silverman587da252020-01-01 17:00:47 -0800183 chrono::ceil<chrono::seconds>(chrono::nanoseconds(
Austin Schuhaa79e4e2019-12-29 20:43:32 -0800184 event_loop->configuration()->channel_storage_duration()))),
Alex Perrycb7da4b2019-08-28 19:35:56 -0700185 lockless_queue_(lockless_queue_memory_.memory(),
186 lockless_queue_memory_.config()),
Brian Silvermana1652f32020-01-29 20:41:44 -0800187 data_storage_(static_cast<char *>(malloc(channel->max_size() +
188 kChannelDataAlignment - 1)),
Alex Perrycb7da4b2019-08-28 19:35:56 -0700189 &free) {
190 context_.data = nullptr;
191 // Point the queue index at the next index to read starting now. This
192 // makes it such that FetchNext will read the next message sent after
193 // the fetcher is created.
194 PointAtNextQueueIndex();
195 }
196
Austin Schuh39788ff2019-12-01 18:22:57 -0800197 ~SimpleShmFetcher() {}
Alex Perrycb7da4b2019-08-28 19:35:56 -0700198
199 // Points the next message to fetch at the queue index which will be
200 // populated next.
201 void PointAtNextQueueIndex() {
202 actual_queue_index_ = lockless_queue_.LatestQueueIndex();
203 if (!actual_queue_index_.valid()) {
204 // Nothing in the queue. The next element will show up at the 0th
205 // index in the queue.
206 actual_queue_index_ =
207 ipc_lib::QueueIndex::Zero(lockless_queue_.queue_size());
208 } else {
209 actual_queue_index_ = actual_queue_index_.Increment();
210 }
211 }
212
Austin Schuh39788ff2019-12-01 18:22:57 -0800213 bool FetchNext() {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700214 // TODO(austin): Get behind and make sure it dies both here and with
215 // Fetch.
216 ipc_lib::LocklessQueue::ReadResult read_result = lockless_queue_.Read(
Austin Schuhad154822019-12-27 15:45:13 -0800217 actual_queue_index_.index(), &context_.monotonic_event_time,
218 &context_.realtime_event_time, &context_.monotonic_remote_time,
219 &context_.realtime_remote_time, &context_.remote_queue_index,
Brian Silvermana1652f32020-01-29 20:41:44 -0800220 &context_.size, data_storage_start());
Alex Perrycb7da4b2019-08-28 19:35:56 -0700221 if (read_result == ipc_lib::LocklessQueue::ReadResult::GOOD) {
222 context_.queue_index = actual_queue_index_.index();
Austin Schuhad154822019-12-27 15:45:13 -0800223 if (context_.remote_queue_index == 0xffffffffu) {
224 context_.remote_queue_index = context_.queue_index;
225 }
226 if (context_.monotonic_remote_time == aos::monotonic_clock::min_time) {
227 context_.monotonic_remote_time = context_.monotonic_event_time;
228 }
229 if (context_.realtime_remote_time == aos::realtime_clock::min_time) {
230 context_.realtime_remote_time = context_.realtime_event_time;
231 }
Brian Silvermana1652f32020-01-29 20:41:44 -0800232 context_.data = data_storage_start() +
Austin Schuh39788ff2019-12-01 18:22:57 -0800233 lockless_queue_.message_data_size() - context_.size;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700234 actual_queue_index_ = actual_queue_index_.Increment();
235 }
236
237 // Make sure the data wasn't modified while we were reading it. This
238 // can only happen if you are reading the last message *while* it is
239 // being written to, which means you are pretty far behind.
240 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::OVERWROTE)
241 << ": Got behind while reading and the last message was modified "
Austin Schuhf5652592019-12-29 16:26:15 -0800242 "out from under us while we were reading it. Don't get so far "
243 "behind. "
244 << configuration::CleanedChannelToString(channel_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700245
246 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::TOO_OLD)
Austin Schuhf5652592019-12-29 16:26:15 -0800247 << ": The next message is no longer available. "
248 << configuration::CleanedChannelToString(channel_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700249 return read_result == ipc_lib::LocklessQueue::ReadResult::GOOD;
250 }
251
Austin Schuh39788ff2019-12-01 18:22:57 -0800252 bool Fetch() {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700253 const ipc_lib::QueueIndex queue_index = lockless_queue_.LatestQueueIndex();
254 // actual_queue_index_ is only meaningful if it was set by Fetch or
255 // FetchNext. This happens when valid_data_ has been set. So, only
256 // skip checking if valid_data_ is true.
257 //
258 // Also, if the latest queue index is invalid, we are empty. So there
259 // is nothing to fetch.
Austin Schuh39788ff2019-12-01 18:22:57 -0800260 if ((context_.data != nullptr &&
Alex Perrycb7da4b2019-08-28 19:35:56 -0700261 queue_index == actual_queue_index_.DecrementBy(1u)) ||
262 !queue_index.valid()) {
263 return false;
264 }
265
Austin Schuhad154822019-12-27 15:45:13 -0800266 ipc_lib::LocklessQueue::ReadResult read_result = lockless_queue_.Read(
267 queue_index.index(), &context_.monotonic_event_time,
268 &context_.realtime_event_time, &context_.monotonic_remote_time,
269 &context_.realtime_remote_time, &context_.remote_queue_index,
Brian Silvermana1652f32020-01-29 20:41:44 -0800270 &context_.size, data_storage_start());
Alex Perrycb7da4b2019-08-28 19:35:56 -0700271 if (read_result == ipc_lib::LocklessQueue::ReadResult::GOOD) {
272 context_.queue_index = queue_index.index();
Austin Schuhad154822019-12-27 15:45:13 -0800273 if (context_.remote_queue_index == 0xffffffffu) {
274 context_.remote_queue_index = context_.queue_index;
275 }
276 if (context_.monotonic_remote_time == aos::monotonic_clock::min_time) {
277 context_.monotonic_remote_time = context_.monotonic_event_time;
278 }
279 if (context_.realtime_remote_time == aos::realtime_clock::min_time) {
280 context_.realtime_remote_time = context_.realtime_event_time;
281 }
Brian Silvermana1652f32020-01-29 20:41:44 -0800282 context_.data = data_storage_start() +
Austin Schuh39788ff2019-12-01 18:22:57 -0800283 lockless_queue_.message_data_size() - context_.size;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700284 actual_queue_index_ = queue_index.Increment();
285 }
286
287 // Make sure the data wasn't modified while we were reading it. This
288 // can only happen if you are reading the last message *while* it is
289 // being written to, which means you are pretty far behind.
290 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::OVERWROTE)
291 << ": Got behind while reading and the last message was modified "
Austin Schuhf5652592019-12-29 16:26:15 -0800292 "out from under us while we were reading it. Don't get so far "
293 "behind."
294 << configuration::CleanedChannelToString(channel_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700295
296 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::NOTHING_NEW)
Austin Schuhf5652592019-12-29 16:26:15 -0800297 << ": Queue index went backwards. This should never happen. "
298 << configuration::CleanedChannelToString(channel_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700299
300 // We fell behind between when we read the index and read the value.
301 // This isn't worth recovering from since this means we went to sleep
302 // for a long time in the middle of this function.
303 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::TOO_OLD)
Austin Schuhf5652592019-12-29 16:26:15 -0800304 << ": The next message is no longer available. "
305 << configuration::CleanedChannelToString(channel_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700306 return read_result == ipc_lib::LocklessQueue::ReadResult::GOOD;
307 }
308
Austin Schuh39788ff2019-12-01 18:22:57 -0800309 Context context() const { return context_; }
310
Alex Perrycb7da4b2019-08-28 19:35:56 -0700311 bool RegisterWakeup(int priority) {
312 return lockless_queue_.RegisterWakeup(priority);
313 }
314
315 void UnregisterWakeup() { lockless_queue_.UnregisterWakeup(); }
316
317 private:
Brian Silvermana1652f32020-01-29 20:41:44 -0800318 char *data_storage_start() {
319 return RoundChannelData(data_storage_.get(), channel_->max_size());
320 }
321
Austin Schuhf5652592019-12-29 16:26:15 -0800322 const Channel *const channel_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700323 MMapedQueue lockless_queue_memory_;
324 ipc_lib::LocklessQueue lockless_queue_;
325
326 ipc_lib::QueueIndex actual_queue_index_ =
327 ipc_lib::LocklessQueue::empty_queue_index();
328
Brian Silvermana1652f32020-01-29 20:41:44 -0800329 std::unique_ptr<char, decltype(&free)> data_storage_;
Austin Schuh39788ff2019-12-01 18:22:57 -0800330
331 Context context_;
332};
333
334class ShmFetcher : public RawFetcher {
335 public:
336 explicit ShmFetcher(EventLoop *event_loop, const Channel *channel)
Austin Schuhaa79e4e2019-12-29 20:43:32 -0800337 : RawFetcher(event_loop, channel),
338 simple_shm_fetcher_(event_loop, channel) {}
Austin Schuh39788ff2019-12-01 18:22:57 -0800339
340 ~ShmFetcher() { context_.data = nullptr; }
341
342 std::pair<bool, monotonic_clock::time_point> DoFetchNext() override {
343 if (simple_shm_fetcher_.FetchNext()) {
344 context_ = simple_shm_fetcher_.context();
345 return std::make_pair(true, monotonic_clock::now());
346 }
347 return std::make_pair(false, monotonic_clock::min_time);
348 }
349
350 std::pair<bool, monotonic_clock::time_point> DoFetch() override {
351 if (simple_shm_fetcher_.Fetch()) {
352 context_ = simple_shm_fetcher_.context();
353 return std::make_pair(true, monotonic_clock::now());
354 }
355 return std::make_pair(false, monotonic_clock::min_time);
356 }
357
358 private:
359 SimpleShmFetcher simple_shm_fetcher_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700360};
361
362class ShmSender : public RawSender {
363 public:
Austin Schuh39788ff2019-12-01 18:22:57 -0800364 explicit ShmSender(EventLoop *event_loop, const Channel *channel)
365 : RawSender(event_loop, channel),
Austin Schuhaa79e4e2019-12-29 20:43:32 -0800366 lockless_queue_memory_(
367 channel,
Brian Silverman587da252020-01-01 17:00:47 -0800368 chrono::ceil<chrono::seconds>(chrono::nanoseconds(
Austin Schuhaa79e4e2019-12-29 20:43:32 -0800369 event_loop->configuration()->channel_storage_duration()))),
Alex Perrycb7da4b2019-08-28 19:35:56 -0700370 lockless_queue_(lockless_queue_memory_.memory(),
371 lockless_queue_memory_.config()),
372 lockless_queue_sender_(lockless_queue_.MakeSender()) {}
373
Austin Schuh39788ff2019-12-01 18:22:57 -0800374 ~ShmSender() override {}
375
Alex Perrycb7da4b2019-08-28 19:35:56 -0700376 void *data() override { return lockless_queue_sender_.Data(); }
377 size_t size() override { return lockless_queue_sender_.size(); }
Austin Schuhad154822019-12-27 15:45:13 -0800378 bool DoSend(size_t length,
379 aos::monotonic_clock::time_point monotonic_remote_time,
380 aos::realtime_clock::time_point realtime_remote_time,
381 uint32_t remote_queue_index) override {
382 lockless_queue_sender_.Send(
383 length, monotonic_remote_time, realtime_remote_time, remote_queue_index,
384 &monotonic_sent_time_, &realtime_sent_time_, &sent_queue_index_);
Austin Schuh39788ff2019-12-01 18:22:57 -0800385 lockless_queue_.Wakeup(event_loop()->priority());
Alex Perrycb7da4b2019-08-28 19:35:56 -0700386 return true;
387 }
388
Austin Schuhad154822019-12-27 15:45:13 -0800389 bool DoSend(const void *msg, size_t length,
390 aos::monotonic_clock::time_point monotonic_remote_time,
391 aos::realtime_clock::time_point realtime_remote_time,
392 uint32_t remote_queue_index) override {
393 lockless_queue_sender_.Send(reinterpret_cast<const char *>(msg), length,
394 monotonic_remote_time, realtime_remote_time,
395 remote_queue_index, &monotonic_sent_time_,
396 &realtime_sent_time_, &sent_queue_index_);
Austin Schuh39788ff2019-12-01 18:22:57 -0800397 lockless_queue_.Wakeup(event_loop()->priority());
Alex Perrycb7da4b2019-08-28 19:35:56 -0700398 // TODO(austin): Return an error if we send too fast.
399 return true;
400 }
401
Alex Perrycb7da4b2019-08-28 19:35:56 -0700402 private:
Alex Perrycb7da4b2019-08-28 19:35:56 -0700403 MMapedQueue lockless_queue_memory_;
404 ipc_lib::LocklessQueue lockless_queue_;
405 ipc_lib::LocklessQueue::Sender lockless_queue_sender_;
406};
407
Alex Perrycb7da4b2019-08-28 19:35:56 -0700408// Class to manage the state for a Watcher.
Austin Schuh39788ff2019-12-01 18:22:57 -0800409class WatcherState : public aos::WatcherState {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700410 public:
411 WatcherState(
Austin Schuh7d87b672019-12-01 20:23:49 -0800412 ShmEventLoop *event_loop, const Channel *channel,
Austin Schuh39788ff2019-12-01 18:22:57 -0800413 std::function<void(const Context &context, const void *message)> fn)
414 : aos::WatcherState(event_loop, channel, std::move(fn)),
Austin Schuh7d87b672019-12-01 20:23:49 -0800415 event_loop_(event_loop),
416 event_(this),
Austin Schuhaa79e4e2019-12-29 20:43:32 -0800417 simple_shm_fetcher_(event_loop, channel) {}
Alex Perrycb7da4b2019-08-28 19:35:56 -0700418
Austin Schuh7d87b672019-12-01 20:23:49 -0800419 ~WatcherState() override { event_loop_->RemoveEvent(&event_); }
Austin Schuh39788ff2019-12-01 18:22:57 -0800420
421 void Startup(EventLoop *event_loop) override {
Austin Schuh7d87b672019-12-01 20:23:49 -0800422 simple_shm_fetcher_.PointAtNextQueueIndex();
Austin Schuh39788ff2019-12-01 18:22:57 -0800423 CHECK(RegisterWakeup(event_loop->priority()));
424 }
Alex Perrycb7da4b2019-08-28 19:35:56 -0700425
Alex Perrycb7da4b2019-08-28 19:35:56 -0700426 // Returns true if there is new data available.
Austin Schuh7d87b672019-12-01 20:23:49 -0800427 bool CheckForNewData() {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700428 if (!has_new_data_) {
Austin Schuh39788ff2019-12-01 18:22:57 -0800429 has_new_data_ = simple_shm_fetcher_.FetchNext();
Austin Schuh7d87b672019-12-01 20:23:49 -0800430
431 if (has_new_data_) {
432 event_.set_event_time(
Austin Schuhad154822019-12-27 15:45:13 -0800433 simple_shm_fetcher_.context().monotonic_event_time);
Austin Schuh7d87b672019-12-01 20:23:49 -0800434 event_loop_->AddEvent(&event_);
435 }
Alex Perrycb7da4b2019-08-28 19:35:56 -0700436 }
437
438 return has_new_data_;
439 }
440
Alex Perrycb7da4b2019-08-28 19:35:56 -0700441 // Consumes the data by calling the callback.
Austin Schuh7d87b672019-12-01 20:23:49 -0800442 void HandleEvent() {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700443 CHECK(has_new_data_);
Austin Schuh39788ff2019-12-01 18:22:57 -0800444 DoCallCallback(monotonic_clock::now, simple_shm_fetcher_.context());
Alex Perrycb7da4b2019-08-28 19:35:56 -0700445 has_new_data_ = false;
Austin Schuh7d87b672019-12-01 20:23:49 -0800446 CheckForNewData();
Alex Perrycb7da4b2019-08-28 19:35:56 -0700447 }
448
Austin Schuh39788ff2019-12-01 18:22:57 -0800449 // Registers us to receive a signal on event reception.
Alex Perrycb7da4b2019-08-28 19:35:56 -0700450 bool RegisterWakeup(int priority) {
Austin Schuh39788ff2019-12-01 18:22:57 -0800451 return simple_shm_fetcher_.RegisterWakeup(priority);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700452 }
453
Austin Schuh39788ff2019-12-01 18:22:57 -0800454 void UnregisterWakeup() { return simple_shm_fetcher_.UnregisterWakeup(); }
Alex Perrycb7da4b2019-08-28 19:35:56 -0700455
456 private:
457 bool has_new_data_ = false;
458
Austin Schuh7d87b672019-12-01 20:23:49 -0800459 ShmEventLoop *event_loop_;
460 EventHandler<WatcherState> event_;
Austin Schuh39788ff2019-12-01 18:22:57 -0800461 SimpleShmFetcher simple_shm_fetcher_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700462};
463
464// Adapter class to adapt a timerfd to a TimerHandler.
Austin Schuh7d87b672019-12-01 20:23:49 -0800465class TimerHandlerState final : public TimerHandler {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700466 public:
467 TimerHandlerState(ShmEventLoop *shm_event_loop, ::std::function<void()> fn)
Austin Schuh39788ff2019-12-01 18:22:57 -0800468 : TimerHandler(shm_event_loop, std::move(fn)),
Austin Schuh7d87b672019-12-01 20:23:49 -0800469 shm_event_loop_(shm_event_loop),
470 event_(this) {
471 shm_event_loop_->epoll_.OnReadable(
472 timerfd_.fd(), [this]() { shm_event_loop_->HandleEvent(); });
Alex Perrycb7da4b2019-08-28 19:35:56 -0700473 }
474
Austin Schuh7d87b672019-12-01 20:23:49 -0800475 ~TimerHandlerState() {
476 Disable();
477 shm_event_loop_->epoll_.DeleteFd(timerfd_.fd());
478 }
479
480 void HandleEvent() {
481 uint64_t elapsed_cycles = timerfd_.Read();
482 if (elapsed_cycles == 0u) {
483 // We got called before the timer interrupt could happen, but because we
484 // are checking the time, we got called on time. Push the timer out by 1
485 // cycle.
486 elapsed_cycles = 1u;
487 timerfd_.SetTime(base_ + repeat_offset_, repeat_offset_);
488 }
489
490 Call(monotonic_clock::now, base_);
491
492 base_ += repeat_offset_ * elapsed_cycles;
493
494 if (repeat_offset_ != chrono::seconds(0)) {
495 event_.set_event_time(base_);
496 shm_event_loop_->AddEvent(&event_);
497 }
498 }
Alex Perrycb7da4b2019-08-28 19:35:56 -0700499
500 void Setup(monotonic_clock::time_point base,
501 monotonic_clock::duration repeat_offset) override {
Austin Schuh7d87b672019-12-01 20:23:49 -0800502 if (event_.valid()) {
503 shm_event_loop_->RemoveEvent(&event_);
504 }
505
Alex Perrycb7da4b2019-08-28 19:35:56 -0700506 timerfd_.SetTime(base, repeat_offset);
Austin Schuhde8a8ff2019-11-30 15:25:36 -0800507 base_ = base;
508 repeat_offset_ = repeat_offset;
Austin Schuh7d87b672019-12-01 20:23:49 -0800509 event_.set_event_time(base_);
510 shm_event_loop_->AddEvent(&event_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700511 }
512
Austin Schuh7d87b672019-12-01 20:23:49 -0800513 void Disable() override {
514 shm_event_loop_->RemoveEvent(&event_);
515 timerfd_.Disable();
516 }
Alex Perrycb7da4b2019-08-28 19:35:56 -0700517
518 private:
519 ShmEventLoop *shm_event_loop_;
Austin Schuh7d87b672019-12-01 20:23:49 -0800520 EventHandler<TimerHandlerState> event_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700521
522 TimerFd timerfd_;
523
Austin Schuhde8a8ff2019-11-30 15:25:36 -0800524 monotonic_clock::time_point base_;
525 monotonic_clock::duration repeat_offset_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700526};
527
528// Adapter class to the timerfd and PhasedLoop.
Austin Schuh7d87b672019-12-01 20:23:49 -0800529class PhasedLoopHandler final : public ::aos::PhasedLoopHandler {
Alex Perrycb7da4b2019-08-28 19:35:56 -0700530 public:
531 PhasedLoopHandler(ShmEventLoop *shm_event_loop, ::std::function<void(int)> fn,
532 const monotonic_clock::duration interval,
533 const monotonic_clock::duration offset)
Austin Schuh39788ff2019-12-01 18:22:57 -0800534 : aos::PhasedLoopHandler(shm_event_loop, std::move(fn), interval, offset),
Austin Schuh7d87b672019-12-01 20:23:49 -0800535 shm_event_loop_(shm_event_loop),
536 event_(this) {
537 shm_event_loop_->epoll_.OnReadable(
538 timerfd_.fd(), [this]() { shm_event_loop_->HandleEvent(); });
539 }
540
541 void HandleEvent() {
542 // The return value for read is the number of cycles that have elapsed.
543 // Because we check to see when this event *should* have happened, there are
544 // cases where Read() will return 0, when 1 cycle has actually happened.
545 // This occurs when the timer interrupt hasn't triggered yet. Therefore,
546 // ignore it. Call handles rescheduling and calculating elapsed cycles
547 // without any extra help.
548 timerfd_.Read();
549 event_.Invalidate();
550
551 Call(monotonic_clock::now, [this](monotonic_clock::time_point sleep_time) {
552 Schedule(sleep_time);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700553 });
554 }
555
Austin Schuh39788ff2019-12-01 18:22:57 -0800556 ~PhasedLoopHandler() override {
557 shm_event_loop_->epoll_.DeleteFd(timerfd_.fd());
Austin Schuh7d87b672019-12-01 20:23:49 -0800558 shm_event_loop_->RemoveEvent(&event_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700559 }
560
561 private:
Austin Schuhde8a8ff2019-11-30 15:25:36 -0800562 // Reschedules the timer.
Austin Schuh39788ff2019-12-01 18:22:57 -0800563 void Schedule(monotonic_clock::time_point sleep_time) override {
Austin Schuh7d87b672019-12-01 20:23:49 -0800564 if (event_.valid()) {
565 shm_event_loop_->RemoveEvent(&event_);
566 }
567
Austin Schuh39788ff2019-12-01 18:22:57 -0800568 timerfd_.SetTime(sleep_time, ::aos::monotonic_clock::zero());
Austin Schuh7d87b672019-12-01 20:23:49 -0800569 event_.set_event_time(sleep_time);
570 shm_event_loop_->AddEvent(&event_);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700571 }
572
573 ShmEventLoop *shm_event_loop_;
Austin Schuh7d87b672019-12-01 20:23:49 -0800574 EventHandler<PhasedLoopHandler> event_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700575
576 TimerFd timerfd_;
Alex Perrycb7da4b2019-08-28 19:35:56 -0700577};
578} // namespace internal
579
580::std::unique_ptr<RawFetcher> ShmEventLoop::MakeRawFetcher(
581 const Channel *channel) {
Austin Schuhca4828c2019-12-28 14:21:35 -0800582 if (!configuration::ChannelIsReadableOnNode(channel, node())) {
583 LOG(FATAL) << "Channel { \"name\": \"" << channel->name()->string_view()
584 << "\", \"type\": \"" << channel->type()->string_view()
585 << "\" } is not able to be fetched on this node. Check your "
586 "configuration.";
Austin Schuh217a9782019-12-21 23:02:50 -0800587 }
588
Austin Schuh39788ff2019-12-01 18:22:57 -0800589 return ::std::unique_ptr<RawFetcher>(new internal::ShmFetcher(this, channel));
Alex Perrycb7da4b2019-08-28 19:35:56 -0700590}
591
592::std::unique_ptr<RawSender> ShmEventLoop::MakeRawSender(
593 const Channel *channel) {
Brian Silverman0fc69932020-01-24 21:54:02 -0800594 TakeSender(channel);
Austin Schuh39788ff2019-12-01 18:22:57 -0800595
596 return ::std::unique_ptr<RawSender>(new internal::ShmSender(this, channel));
Alex Perrycb7da4b2019-08-28 19:35:56 -0700597}
598
599void ShmEventLoop::MakeRawWatcher(
600 const Channel *channel,
601 std::function<void(const Context &context, const void *message)> watcher) {
Brian Silverman0fc69932020-01-24 21:54:02 -0800602 TakeWatcher(channel);
Austin Schuh217a9782019-12-21 23:02:50 -0800603
Austin Schuh39788ff2019-12-01 18:22:57 -0800604 NewWatcher(::std::unique_ptr<WatcherState>(
605 new internal::WatcherState(this, channel, std::move(watcher))));
Alex Perrycb7da4b2019-08-28 19:35:56 -0700606}
607
608TimerHandler *ShmEventLoop::AddTimer(::std::function<void()> callback) {
Austin Schuh39788ff2019-12-01 18:22:57 -0800609 return NewTimer(::std::unique_ptr<TimerHandler>(
610 new internal::TimerHandlerState(this, ::std::move(callback))));
Alex Perrycb7da4b2019-08-28 19:35:56 -0700611}
612
613PhasedLoopHandler *ShmEventLoop::AddPhasedLoop(
614 ::std::function<void(int)> callback,
615 const monotonic_clock::duration interval,
616 const monotonic_clock::duration offset) {
Austin Schuh39788ff2019-12-01 18:22:57 -0800617 return NewPhasedLoop(
618 ::std::unique_ptr<PhasedLoopHandler>(new internal::PhasedLoopHandler(
619 this, ::std::move(callback), interval, offset)));
Alex Perrycb7da4b2019-08-28 19:35:56 -0700620}
621
622void ShmEventLoop::OnRun(::std::function<void()> on_run) {
623 on_run_.push_back(::std::move(on_run));
624}
625
Austin Schuh7d87b672019-12-01 20:23:49 -0800626void ShmEventLoop::HandleEvent() {
627 // Update all the times for handlers.
628 for (::std::unique_ptr<WatcherState> &base_watcher : watchers_) {
629 internal::WatcherState *watcher =
630 reinterpret_cast<internal::WatcherState *>(base_watcher.get());
631
632 watcher->CheckForNewData();
633 }
634
Austin Schuh39788ff2019-12-01 18:22:57 -0800635 while (true) {
Austin Schuh7d87b672019-12-01 20:23:49 -0800636 if (EventCount() == 0 ||
637 PeekEvent()->event_time() > monotonic_clock::now()) {
Austin Schuh39788ff2019-12-01 18:22:57 -0800638 break;
639 }
640
Austin Schuh7d87b672019-12-01 20:23:49 -0800641 EventLoopEvent *event = PopEvent();
642 event->HandleEvent();
Austin Schuh39788ff2019-12-01 18:22:57 -0800643 }
644}
645
Austin Schuh32fd5a72019-12-01 22:20:26 -0800646// RAII class to mask signals.
647class ScopedSignalMask {
648 public:
649 ScopedSignalMask(std::initializer_list<int> signals) {
650 sigset_t sigset;
651 PCHECK(sigemptyset(&sigset) == 0);
652 for (int signal : signals) {
653 PCHECK(sigaddset(&sigset, signal) == 0);
654 }
655
656 PCHECK(sigprocmask(SIG_BLOCK, &sigset, &old_) == 0);
657 }
658
659 ~ScopedSignalMask() { PCHECK(sigprocmask(SIG_SETMASK, &old_, nullptr) == 0); }
660
661 private:
662 sigset_t old_;
663};
664
665// Class to manage the static state associated with killing multiple event
666// loops.
667class SignalHandler {
668 public:
669 // Gets the singleton.
670 static SignalHandler *global() {
671 static SignalHandler loop;
672 return &loop;
673 }
674
675 // Handles the signal with the singleton.
676 static void HandleSignal(int) { global()->DoHandleSignal(); }
677
678 // Registers an event loop to receive Exit() calls.
679 void Register(ShmEventLoop *event_loop) {
680 // Block signals while we have the mutex so we never race with the signal
681 // handler.
682 ScopedSignalMask mask({SIGINT, SIGHUP, SIGTERM});
683 std::unique_lock<stl_mutex> locker(mutex_);
684 if (event_loops_.size() == 0) {
685 // The first caller registers the signal handler.
686 struct sigaction new_action;
687 sigemptyset(&new_action.sa_mask);
688 // This makes it so that 2 control c's to a stuck process will kill it by
689 // restoring the original signal handler.
690 new_action.sa_flags = SA_RESETHAND;
691 new_action.sa_handler = &HandleSignal;
692
693 PCHECK(sigaction(SIGINT, &new_action, &old_action_int_) == 0);
694 PCHECK(sigaction(SIGHUP, &new_action, &old_action_hup_) == 0);
695 PCHECK(sigaction(SIGTERM, &new_action, &old_action_term_) == 0);
696 }
697
698 event_loops_.push_back(event_loop);
699 }
700
701 // Unregisters an event loop to receive Exit() calls.
702 void Unregister(ShmEventLoop *event_loop) {
703 // Block signals while we have the mutex so we never race with the signal
704 // handler.
705 ScopedSignalMask mask({SIGINT, SIGHUP, SIGTERM});
706 std::unique_lock<stl_mutex> locker(mutex_);
707
708 event_loops_.erase(std::find(event_loops_.begin(), event_loops_.end(), event_loop));
709
710 if (event_loops_.size() == 0u) {
711 // The last caller restores the original signal handlers.
712 PCHECK(sigaction(SIGINT, &old_action_int_, nullptr) == 0);
713 PCHECK(sigaction(SIGHUP, &old_action_hup_, nullptr) == 0);
714 PCHECK(sigaction(SIGTERM, &old_action_term_, nullptr) == 0);
715 }
716 }
717
718 private:
719 void DoHandleSignal() {
720 // We block signals while grabbing the lock, so there should never be a
721 // race. Confirm that this is true using trylock.
722 CHECK(mutex_.try_lock()) << ": sigprocmask failed to block signals while "
723 "modifing the event loop list.";
724 for (ShmEventLoop *event_loop : event_loops_) {
725 event_loop->Exit();
726 }
727 mutex_.unlock();
728 }
729
730 // Mutex to protect all state.
731 stl_mutex mutex_;
732 std::vector<ShmEventLoop *> event_loops_;
733 struct sigaction old_action_int_;
734 struct sigaction old_action_hup_;
735 struct sigaction old_action_term_;
736};
737
Alex Perrycb7da4b2019-08-28 19:35:56 -0700738void ShmEventLoop::Run() {
Austin Schuh32fd5a72019-12-01 22:20:26 -0800739 SignalHandler::global()->Register(this);
Austin Schuh39788ff2019-12-01 18:22:57 -0800740
Alex Perrycb7da4b2019-08-28 19:35:56 -0700741 std::unique_ptr<ipc_lib::SignalFd> signalfd;
742
743 if (watchers_.size() > 0) {
744 signalfd.reset(new ipc_lib::SignalFd({ipc_lib::kWakeupSignal}));
745
746 epoll_.OnReadable(signalfd->fd(), [signalfd_ptr = signalfd.get(), this]() {
747 signalfd_siginfo result = signalfd_ptr->Read();
748 CHECK_EQ(result.ssi_signo, ipc_lib::kWakeupSignal);
749
750 // TODO(austin): We should really be checking *everything*, not just
751 // watchers, and calling the oldest thing first. That will improve
752 // determinism a lot.
753
Austin Schuh7d87b672019-12-01 20:23:49 -0800754 HandleEvent();
Alex Perrycb7da4b2019-08-28 19:35:56 -0700755 });
756 }
757
Austin Schuh39788ff2019-12-01 18:22:57 -0800758 MaybeScheduleTimingReports();
759
Austin Schuh7d87b672019-12-01 20:23:49 -0800760 ReserveEvents();
761
James Kuszmaul57c2baa2020-01-19 14:52:52 -0800762 aos::SetCurrentThreadName(name_.substr(0, 16));
Austin Schuh39788ff2019-12-01 18:22:57 -0800763 // Now, all the callbacks are setup. Lock everything into memory and go RT.
Alex Perrycb7da4b2019-08-28 19:35:56 -0700764 if (priority_ != 0) {
765 ::aos::InitRT();
766
767 LOG(INFO) << "Setting priority to " << priority_;
768 ::aos::SetCurrentThreadRealtimePriority(priority_);
769 }
770
771 set_is_running(true);
772
773 // Now that we are realtime (but before the OnRun handlers run), snap the
774 // queue index.
Austin Schuh39788ff2019-12-01 18:22:57 -0800775 for (::std::unique_ptr<WatcherState> &watcher : watchers_) {
776 watcher->Startup(this);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700777 }
778
779 // Now that we are RT, run all the OnRun handlers.
780 for (const auto &run : on_run_) {
781 run();
782 }
783
Alex Perrycb7da4b2019-08-28 19:35:56 -0700784 // And start our main event loop which runs all the timers and handles Quit.
785 epoll_.Run();
786
787 // Once epoll exits, there is no useful nonrt work left to do.
788 set_is_running(false);
789
790 // Nothing time or synchronization critical needs to happen after this point.
791 // Drop RT priority.
792 ::aos::UnsetCurrentThreadRealtimePriority();
793
Austin Schuh39788ff2019-12-01 18:22:57 -0800794 for (::std::unique_ptr<WatcherState> &base_watcher : watchers_) {
795 internal::WatcherState *watcher =
796 reinterpret_cast<internal::WatcherState *>(base_watcher.get());
Alex Perrycb7da4b2019-08-28 19:35:56 -0700797 watcher->UnregisterWakeup();
798 }
799
800 if (watchers_.size() > 0) {
801 epoll_.DeleteFd(signalfd->fd());
802 signalfd.reset();
803 }
Austin Schuh32fd5a72019-12-01 22:20:26 -0800804
805 SignalHandler::global()->Unregister(this);
Austin Schuhe84c3ed2019-12-14 15:29:48 -0800806
807 // Trigger any remaining senders or fetchers to be cleared before destroying
808 // the event loop so the book keeping matches. Do this in the thread that
809 // created the timing reporter.
810 timing_report_sender_.reset();
Alex Perrycb7da4b2019-08-28 19:35:56 -0700811}
812
813void ShmEventLoop::Exit() { epoll_.Quit(); }
814
815ShmEventLoop::~ShmEventLoop() {
Austin Schuh39788ff2019-12-01 18:22:57 -0800816 // Force everything with a registered fd with epoll to be destroyed now.
817 timers_.clear();
818 phased_loops_.clear();
819 watchers_.clear();
820
Alex Perrycb7da4b2019-08-28 19:35:56 -0700821 CHECK(!is_running()) << ": ShmEventLoop destroyed while running";
822}
823
Alex Perrycb7da4b2019-08-28 19:35:56 -0700824void ShmEventLoop::SetRuntimeRealtimePriority(int priority) {
825 if (is_running()) {
826 LOG(FATAL) << "Cannot set realtime priority while running.";
827 }
828 priority_ = priority;
829}
830
James Kuszmaul57c2baa2020-01-19 14:52:52 -0800831void ShmEventLoop::set_name(const std::string_view name) {
832 name_ = std::string(name);
833 UpdateTimingReport();
834}
835
Austin Schuh39788ff2019-12-01 18:22:57 -0800836pid_t ShmEventLoop::GetTid() { return syscall(SYS_gettid); }
837
Alex Perrycb7da4b2019-08-28 19:35:56 -0700838} // namespace aos