blob: 5a88717836024c2738ea47ce550df18f5e482824 [file] [log] [blame]
Alex Perrycb7da4b2019-08-28 19:35:56 -07001#include "glog/logging.h"
2
3#include "aos/events/shm_event_loop.h"
4
5#include <sys/mman.h>
6#include <sys/stat.h>
7#include <sys/timerfd.h>
8#include <sys/types.h>
9#include <unistd.h>
10#include <algorithm>
11#include <atomic>
12#include <chrono>
13#include <stdexcept>
14
15#include "aos/events/epoll.h"
16#include "aos/ipc_lib/lockless_queue.h"
17#include "aos/realtime.h"
18#include "aos/util/phased_loop.h"
19
20DEFINE_string(shm_base, "/dev/shm/aos",
21 "Directory to place queue backing mmaped files in.");
22DEFINE_uint32(permissions, 0770,
23 "Permissions to make shared memory files and folders.");
24
25namespace aos {
26
27std::string ShmFolder(const Channel *channel) {
28 CHECK(channel->has_name());
29 CHECK_EQ(channel->name()->string_view()[0], '/');
30 return FLAGS_shm_base + channel->name()->str() + "/";
31}
32std::string ShmPath(const Channel *channel) {
33 CHECK(channel->has_type());
34 return ShmFolder(channel) + channel->type()->str() + ".v0";
35}
36
37class MMapedQueue {
38 public:
39 MMapedQueue(const Channel *channel) {
40 std::string path = ShmPath(channel);
41
42 // TODO(austin): Pull these out into the config if there is a need.
43 config_.num_watchers = 10;
44 config_.num_senders = 10;
45 config_.queue_size = 2 * channel->frequency();
46 config_.message_data_size = channel->max_size();
47
48 size_ = ipc_lib::LocklessQueueMemorySize(config_);
49
50 MkdirP(path);
51
52 // There are 2 cases. Either the file already exists, or it does not
53 // already exist and we need to create it. Start by trying to create it. If
54 // that fails, the file has already been created and we can open it
55 // normally.. Once the file has been created it wil never be deleted.
56 fd_ = open(path.c_str(), O_RDWR | O_CREAT | O_EXCL,
57 O_CLOEXEC | FLAGS_permissions);
58 if (fd_ == -1 && errno == EEXIST) {
59 VLOG(1) << path << " already created.";
60 // File already exists.
61 fd_ = open(path.c_str(), O_RDWR, O_CLOEXEC);
62 PCHECK(fd_ != -1) << ": Failed to open " << path;
63 while (true) {
64 struct stat st;
65 PCHECK(fstat(fd_, &st) == 0);
66 if (st.st_size != 0) {
67 CHECK_EQ(static_cast<size_t>(st.st_size), size_)
68 << ": Size of " << path
69 << " doesn't match expected size of backing queue file. Did the "
70 "queue definition change?";
71 break;
72 } else {
73 // The creating process didn't get around to it yet. Give it a bit.
74 std::this_thread::sleep_for(std::chrono::milliseconds(10));
75 VLOG(1) << path << " is zero size, waiting";
76 }
77 }
78 } else {
79 VLOG(1) << "Created " << path;
80 PCHECK(fd_ != -1) << ": Failed to open " << path;
81 PCHECK(ftruncate(fd_, size_) == 0);
82 }
83
84 data_ = mmap(NULL, size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0);
85 PCHECK(data_ != MAP_FAILED);
86
87 ipc_lib::InitializeLocklessQueueMemory(memory(), config_);
88 }
89
90 ~MMapedQueue() {
91 PCHECK(munmap(data_, size_) == 0);
92 PCHECK(close(fd_) == 0);
93 }
94
95 ipc_lib::LocklessQueueMemory *memory() const {
96 return reinterpret_cast<ipc_lib::LocklessQueueMemory *>(data_);
97 }
98
99 const ipc_lib::LocklessQueueConfiguration &config() const {
100 return config_;
101 }
102
103 private:
104 void MkdirP(absl::string_view path) {
105 struct stat st;
106 auto last_slash_pos = path.find_last_of("/");
107
108 std::string folder(last_slash_pos == absl::string_view::npos
109 ? absl::string_view("")
110 : path.substr(0, last_slash_pos));
111 if (stat(folder.c_str(), &st) == -1) {
112 PCHECK(errno == ENOENT);
113 CHECK_NE(folder, "") << ": Base path doesn't exist";
114 MkdirP(folder);
115 VLOG(1) << "Creating " << folder;
116 PCHECK(mkdir(folder.c_str(), FLAGS_permissions) == 0);
117 }
118 }
119
120 ipc_lib::LocklessQueueConfiguration config_;
121
122 int fd_;
123
124 size_t size_;
125 void *data_;
126};
127
128// Returns the portion of the path after the last /.
129absl::string_view Filename(absl::string_view path) {
130 auto last_slash_pos = path.find_last_of("/");
131
132 return last_slash_pos == absl::string_view::npos
133 ? path
134 : path.substr(last_slash_pos + 1, path.size());
135}
136
137ShmEventLoop::ShmEventLoop(const Configuration *configuration)
138 : EventLoop(configuration), name_(Filename(program_invocation_name)) {}
139
140namespace {
141
142namespace chrono = ::std::chrono;
143
144class ShmFetcher : public RawFetcher {
145 public:
146 explicit ShmFetcher(const Channel *channel)
147 : lockless_queue_memory_(channel),
148 lockless_queue_(lockless_queue_memory_.memory(),
149 lockless_queue_memory_.config()),
150 data_storage_(static_cast<AlignedChar *>(aligned_alloc(
151 alignof(AlignedChar), channel->max_size())),
152 &free) {
153 context_.data = nullptr;
154 // Point the queue index at the next index to read starting now. This
155 // makes it such that FetchNext will read the next message sent after
156 // the fetcher is created.
157 PointAtNextQueueIndex();
158 }
159
160 ~ShmFetcher() { data_ = nullptr; }
161
162 // Points the next message to fetch at the queue index which will be
163 // populated next.
164 void PointAtNextQueueIndex() {
165 actual_queue_index_ = lockless_queue_.LatestQueueIndex();
166 if (!actual_queue_index_.valid()) {
167 // Nothing in the queue. The next element will show up at the 0th
168 // index in the queue.
169 actual_queue_index_ =
170 ipc_lib::QueueIndex::Zero(lockless_queue_.queue_size());
171 } else {
172 actual_queue_index_ = actual_queue_index_.Increment();
173 }
174 }
175
176 bool FetchNext() override {
177 // TODO(austin): Write a test which starts with nothing in the queue,
178 // and then calls FetchNext() after something is sent.
179 // TODO(austin): Get behind and make sure it dies both here and with
180 // Fetch.
181 ipc_lib::LocklessQueue::ReadResult read_result = lockless_queue_.Read(
182 actual_queue_index_.index(), &context_.monotonic_sent_time,
183 &context_.realtime_sent_time, &context_.size,
184 reinterpret_cast<char *>(data_storage_.get()));
185 if (read_result == ipc_lib::LocklessQueue::ReadResult::GOOD) {
186 context_.queue_index = actual_queue_index_.index();
187 data_ = reinterpret_cast<char *>(data_storage_.get()) +
188 lockless_queue_.message_data_size() - context_.size;
189 context_.data = data_;
190 actual_queue_index_ = actual_queue_index_.Increment();
191 }
192
193 // Make sure the data wasn't modified while we were reading it. This
194 // can only happen if you are reading the last message *while* it is
195 // being written to, which means you are pretty far behind.
196 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::OVERWROTE)
197 << ": Got behind while reading and the last message was modified "
198 "out "
199 "from under us while we were reading it. Don't get so far "
200 "behind.";
201
202 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::TOO_OLD)
203 << ": The next message is no longer available.";
204 return read_result == ipc_lib::LocklessQueue::ReadResult::GOOD;
205 }
206
207 bool Fetch() override {
208 const ipc_lib::QueueIndex queue_index = lockless_queue_.LatestQueueIndex();
209 // actual_queue_index_ is only meaningful if it was set by Fetch or
210 // FetchNext. This happens when valid_data_ has been set. So, only
211 // skip checking if valid_data_ is true.
212 //
213 // Also, if the latest queue index is invalid, we are empty. So there
214 // is nothing to fetch.
215 if ((data_ != nullptr &&
216 queue_index == actual_queue_index_.DecrementBy(1u)) ||
217 !queue_index.valid()) {
218 return false;
219 }
220
221 ipc_lib::LocklessQueue::ReadResult read_result =
222 lockless_queue_.Read(queue_index.index(), &context_.monotonic_sent_time,
223 &context_.realtime_sent_time, &context_.size,
224 reinterpret_cast<char *>(data_storage_.get()));
225 if (read_result == ipc_lib::LocklessQueue::ReadResult::GOOD) {
226 context_.queue_index = queue_index.index();
227 data_ = reinterpret_cast<char *>(data_storage_.get()) +
228 lockless_queue_.message_data_size() - context_.size;
229 context_.data = data_;
230 actual_queue_index_ = queue_index.Increment();
231 }
232
233 // Make sure the data wasn't modified while we were reading it. This
234 // can only happen if you are reading the last message *while* it is
235 // being written to, which means you are pretty far behind.
236 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::OVERWROTE)
237 << ": Got behind while reading and the last message was modified "
238 "out "
239 "from under us while we were reading it. Don't get so far "
240 "behind.";
241
242 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::NOTHING_NEW)
243 << ": Queue index went backwards. This should never happen.";
244
245 // We fell behind between when we read the index and read the value.
246 // This isn't worth recovering from since this means we went to sleep
247 // for a long time in the middle of this function.
248 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::TOO_OLD)
249 << ": The next message is no longer available.";
250 return read_result == ipc_lib::LocklessQueue::ReadResult::GOOD;
251 }
252
253 bool RegisterWakeup(int priority) {
254 return lockless_queue_.RegisterWakeup(priority);
255 }
256
257 void UnregisterWakeup() { lockless_queue_.UnregisterWakeup(); }
258
259 private:
260 MMapedQueue lockless_queue_memory_;
261 ipc_lib::LocklessQueue lockless_queue_;
262
263 ipc_lib::QueueIndex actual_queue_index_ =
264 ipc_lib::LocklessQueue::empty_queue_index();
265
266 struct AlignedChar {
267 alignas(32) char data;
268 };
269
270 std::unique_ptr<AlignedChar, decltype(&free)> data_storage_;
271};
272
273class ShmSender : public RawSender {
274 public:
275 explicit ShmSender(const Channel *channel, const ShmEventLoop *shm_event_loop)
276 : RawSender(),
277 shm_event_loop_(shm_event_loop),
278 name_(channel->name()->str()),
279 lockless_queue_memory_(channel),
280 lockless_queue_(lockless_queue_memory_.memory(),
281 lockless_queue_memory_.config()),
282 lockless_queue_sender_(lockless_queue_.MakeSender()) {}
283
284 void *data() override { return lockless_queue_sender_.Data(); }
285 size_t size() override { return lockless_queue_sender_.size(); }
286 bool Send(size_t size) override {
287 lockless_queue_sender_.Send(size);
288 lockless_queue_.Wakeup(shm_event_loop_->priority());
289 return true;
290 }
291
292 bool Send(void *msg, size_t length) override {
293 lockless_queue_sender_.Send(reinterpret_cast<char *>(msg), length);
294 lockless_queue_.Wakeup(shm_event_loop_->priority());
295 // TODO(austin): Return an error if we send too fast.
296 return true;
297 }
298
299 const absl::string_view name() const override { return name_; }
300
301 private:
302 const ShmEventLoop *shm_event_loop_;
303 std::string name_;
304 MMapedQueue lockless_queue_memory_;
305 ipc_lib::LocklessQueue lockless_queue_;
306 ipc_lib::LocklessQueue::Sender lockless_queue_sender_;
307};
308
309} // namespace
310
311namespace internal {
312
313// Class to manage the state for a Watcher.
314class WatcherState {
315 public:
316 WatcherState(
317 const Channel *channel,
318 std::function<void(const Context &context, const void *message)> watcher)
319 : shm_fetcher_(channel), watcher_(watcher) {}
320
321 ~WatcherState() {}
322
323 // Points the next message to fetch at the queue index which will be populated
324 // next.
325 void PointAtNextQueueIndex() { shm_fetcher_.PointAtNextQueueIndex(); }
326
327 // Returns true if there is new data available.
328 bool HasNewData() {
329 if (!has_new_data_) {
330 has_new_data_ = shm_fetcher_.FetchNext();
331 }
332
333 return has_new_data_;
334 }
335
336 // Returns the time of the current data sample.
337 aos::monotonic_clock::time_point event_time() const {
338 return shm_fetcher_.context().monotonic_sent_time;
339 }
340
341 // Consumes the data by calling the callback.
342 void CallCallback() {
343 CHECK(has_new_data_);
344 watcher_(shm_fetcher_.context(), shm_fetcher_.most_recent_data());
345 has_new_data_ = false;
346 }
347
348 // Starts the thread and waits until it is running.
349 bool RegisterWakeup(int priority) {
350 return shm_fetcher_.RegisterWakeup(priority);
351 }
352
353 void UnregisterWakeup() { return shm_fetcher_.UnregisterWakeup(); }
354
355 private:
356 bool has_new_data_ = false;
357
358 ShmFetcher shm_fetcher_;
359
360 std::function<void(const Context &context, const void *message)> watcher_;
361};
362
363// Adapter class to adapt a timerfd to a TimerHandler.
364// The part of the API which is accessed by the TimerHandler interface needs to
365// be threadsafe. This means Setup and Disable.
366class TimerHandlerState : public TimerHandler {
367 public:
368 TimerHandlerState(ShmEventLoop *shm_event_loop, ::std::function<void()> fn)
369 : shm_event_loop_(shm_event_loop), fn_(::std::move(fn)) {
370 shm_event_loop_->epoll_.OnReadable(timerfd_.fd(), [this]() {
371 timerfd_.Read();
372 fn_();
373 });
374 }
375
376 ~TimerHandlerState() { shm_event_loop_->epoll_.DeleteFd(timerfd_.fd()); }
377
378 void Setup(monotonic_clock::time_point base,
379 monotonic_clock::duration repeat_offset) override {
380 // SetTime is threadsafe already.
381 timerfd_.SetTime(base, repeat_offset);
382 }
383
384 void Disable() override {
385 // Disable is also threadsafe already.
386 timerfd_.Disable();
387 }
388
389 private:
390 ShmEventLoop *shm_event_loop_;
391
392 TimerFd timerfd_;
393
394 // Function to be run on the thread
395 ::std::function<void()> fn_;
396};
397
398// Adapter class to the timerfd and PhasedLoop.
399// The part of the API which is accessed by the PhasedLoopHandler interface
400// needs to be threadsafe. This means set_interval_and_offset
401class PhasedLoopHandler : public ::aos::PhasedLoopHandler {
402 public:
403 PhasedLoopHandler(ShmEventLoop *shm_event_loop, ::std::function<void(int)> fn,
404 const monotonic_clock::duration interval,
405 const monotonic_clock::duration offset)
406 : shm_event_loop_(shm_event_loop),
407 phased_loop_(interval, shm_event_loop_->monotonic_now(), offset),
408 fn_(::std::move(fn)) {
409 shm_event_loop_->epoll_.OnReadable(timerfd_.fd(), [this]() {
410 timerfd_.Read();
411 // Call the function. To avoid needing a recursive mutex, drop the lock
412 // before running the function.
413 fn_(cycles_elapsed_);
414 Reschedule();
415 });
416 }
417
418 ~PhasedLoopHandler() { shm_event_loop_->epoll_.DeleteFd(timerfd_.fd()); }
419
420 void set_interval_and_offset(
421 const monotonic_clock::duration interval,
422 const monotonic_clock::duration offset) override {
423 phased_loop_.set_interval_and_offset(interval, offset);
424 }
425
426 void Startup() {
427 phased_loop_.Reset(shm_event_loop_->monotonic_now());
428 Reschedule();
429 }
430
431 private:
432 // Reschedules the timer. Must be called with the mutex held.
433 void Reschedule() {
434 cycles_elapsed_ = phased_loop_.Iterate(shm_event_loop_->monotonic_now());
435 timerfd_.SetTime(phased_loop_.sleep_time(), ::aos::monotonic_clock::zero());
436 }
437
438 ShmEventLoop *shm_event_loop_;
439
440 TimerFd timerfd_;
441 time::PhasedLoop phased_loop_;
442
443 int cycles_elapsed_ = 1;
444
445 // Function to be run
446 const ::std::function<void(int)> fn_;
447};
448} // namespace internal
449
450::std::unique_ptr<RawFetcher> ShmEventLoop::MakeRawFetcher(
451 const Channel *channel) {
452 return ::std::unique_ptr<RawFetcher>(new ShmFetcher(channel));
453}
454
455::std::unique_ptr<RawSender> ShmEventLoop::MakeRawSender(
456 const Channel *channel) {
457 Take(channel);
458 return ::std::unique_ptr<RawSender>(new ShmSender(channel, this));
459}
460
461void ShmEventLoop::MakeRawWatcher(
462 const Channel *channel,
463 std::function<void(const Context &context, const void *message)> watcher) {
464 Take(channel);
465
466 ::std::unique_ptr<internal::WatcherState> state(
467 new internal::WatcherState(
468 channel, std::move(watcher)));
469 watchers_.push_back(::std::move(state));
470}
471
472TimerHandler *ShmEventLoop::AddTimer(::std::function<void()> callback) {
473 ::std::unique_ptr<internal::TimerHandlerState> timer(
474 new internal::TimerHandlerState(this, ::std::move(callback)));
475
476 timers_.push_back(::std::move(timer));
477
478 return timers_.back().get();
479}
480
481PhasedLoopHandler *ShmEventLoop::AddPhasedLoop(
482 ::std::function<void(int)> callback,
483 const monotonic_clock::duration interval,
484 const monotonic_clock::duration offset) {
485 ::std::unique_ptr<internal::PhasedLoopHandler> phased_loop(
486 new internal::PhasedLoopHandler(this, ::std::move(callback), interval,
487 offset));
488
489 phased_loops_.push_back(::std::move(phased_loop));
490
491 return phased_loops_.back().get();
492}
493
494void ShmEventLoop::OnRun(::std::function<void()> on_run) {
495 on_run_.push_back(::std::move(on_run));
496}
497
498void ShmEventLoop::Run() {
499 std::unique_ptr<ipc_lib::SignalFd> signalfd;
500
501 if (watchers_.size() > 0) {
502 signalfd.reset(new ipc_lib::SignalFd({ipc_lib::kWakeupSignal}));
503
504 epoll_.OnReadable(signalfd->fd(), [signalfd_ptr = signalfd.get(), this]() {
505 signalfd_siginfo result = signalfd_ptr->Read();
506 CHECK_EQ(result.ssi_signo, ipc_lib::kWakeupSignal);
507
508 // TODO(austin): We should really be checking *everything*, not just
509 // watchers, and calling the oldest thing first. That will improve
510 // determinism a lot.
511
512 while (true) {
513 // Call the handlers in time order of their messages.
514 aos::monotonic_clock::time_point min_event_time =
515 aos::monotonic_clock::max_time;
516 size_t min_watcher_index = -1;
517 size_t watcher_index = 0;
518 for (::std::unique_ptr<internal::WatcherState> &watcher : watchers_) {
519 if (watcher->HasNewData()) {
520 if (watcher->event_time() < min_event_time) {
521 min_watcher_index = watcher_index;
522 min_event_time = watcher->event_time();
523 }
524 }
525 ++watcher_index;
526 }
527
528 if (min_event_time == aos::monotonic_clock::max_time) {
529 break;
530 }
531
532 watchers_[min_watcher_index]->CallCallback();
533 }
534 });
535 }
536
537 // Now, all the threads are up. Lock everything into memory and go RT.
538 if (priority_ != 0) {
539 ::aos::InitRT();
540
541 LOG(INFO) << "Setting priority to " << priority_;
542 ::aos::SetCurrentThreadRealtimePriority(priority_);
543 }
544
545 set_is_running(true);
546
547 // Now that we are realtime (but before the OnRun handlers run), snap the
548 // queue index.
549 for (::std::unique_ptr<internal::WatcherState> &watcher : watchers_) {
550 watcher->PointAtNextQueueIndex();
551 CHECK(watcher->RegisterWakeup(priority_));
552 }
553
554 // Now that we are RT, run all the OnRun handlers.
555 for (const auto &run : on_run_) {
556 run();
557 }
558
559 // Start up all the phased loops.
560 for (::std::unique_ptr<internal::PhasedLoopHandler> &phased_loop :
561 phased_loops_) {
562 phased_loop->Startup();
563 }
564
565 // And start our main event loop which runs all the timers and handles Quit.
566 epoll_.Run();
567
568 // Once epoll exits, there is no useful nonrt work left to do.
569 set_is_running(false);
570
571 // Nothing time or synchronization critical needs to happen after this point.
572 // Drop RT priority.
573 ::aos::UnsetCurrentThreadRealtimePriority();
574
575 for (::std::unique_ptr<internal::WatcherState> &watcher : watchers_) {
576 watcher->UnregisterWakeup();
577 }
578
579 if (watchers_.size() > 0) {
580 epoll_.DeleteFd(signalfd->fd());
581 signalfd.reset();
582 }
583}
584
585void ShmEventLoop::Exit() { epoll_.Quit(); }
586
587ShmEventLoop::~ShmEventLoop() {
588 CHECK(!is_running()) << ": ShmEventLoop destroyed while running";
589}
590
591void ShmEventLoop::Take(const Channel *channel) {
592 CHECK(!is_running()) << ": Cannot add new objects while running.";
593
594 // Cheat aggresively. Use the shared memory path as a proxy for a unique
595 // identifier for the channel.
596 const std::string path = ShmPath(channel);
597
598 const auto prior = ::std::find(taken_.begin(), taken_.end(), path);
599 CHECK(prior == taken_.end()) << ": " << path << " is already being used.";
600
601 taken_.emplace_back(path);
602}
603
604void ShmEventLoop::SetRuntimeRealtimePriority(int priority) {
605 if (is_running()) {
606 LOG(FATAL) << "Cannot set realtime priority while running.";
607 }
608 priority_ = priority;
609}
610
611} // namespace aos