blob: 8fed31e0cf1b84ea9f45ba5c655ebc792b37e98c [file] [log] [blame]
Alex Perrycb7da4b2019-08-28 19:35:56 -07001#include "glog/logging.h"
2
3#include "aos/events/shm_event_loop.h"
4
5#include <sys/mman.h>
6#include <sys/stat.h>
7#include <sys/timerfd.h>
8#include <sys/types.h>
9#include <unistd.h>
10#include <algorithm>
11#include <atomic>
12#include <chrono>
13#include <stdexcept>
14
15#include "aos/events/epoll.h"
16#include "aos/ipc_lib/lockless_queue.h"
17#include "aos/realtime.h"
18#include "aos/util/phased_loop.h"
19
20DEFINE_string(shm_base, "/dev/shm/aos",
21 "Directory to place queue backing mmaped files in.");
22DEFINE_uint32(permissions, 0770,
23 "Permissions to make shared memory files and folders.");
24
25namespace aos {
26
27std::string ShmFolder(const Channel *channel) {
28 CHECK(channel->has_name());
29 CHECK_EQ(channel->name()->string_view()[0], '/');
30 return FLAGS_shm_base + channel->name()->str() + "/";
31}
32std::string ShmPath(const Channel *channel) {
33 CHECK(channel->has_type());
34 return ShmFolder(channel) + channel->type()->str() + ".v0";
35}
36
37class MMapedQueue {
38 public:
39 MMapedQueue(const Channel *channel) {
40 std::string path = ShmPath(channel);
41
42 // TODO(austin): Pull these out into the config if there is a need.
43 config_.num_watchers = 10;
44 config_.num_senders = 10;
45 config_.queue_size = 2 * channel->frequency();
46 config_.message_data_size = channel->max_size();
47
48 size_ = ipc_lib::LocklessQueueMemorySize(config_);
49
50 MkdirP(path);
51
52 // There are 2 cases. Either the file already exists, or it does not
53 // already exist and we need to create it. Start by trying to create it. If
54 // that fails, the file has already been created and we can open it
55 // normally.. Once the file has been created it wil never be deleted.
56 fd_ = open(path.c_str(), O_RDWR | O_CREAT | O_EXCL,
57 O_CLOEXEC | FLAGS_permissions);
58 if (fd_ == -1 && errno == EEXIST) {
59 VLOG(1) << path << " already created.";
60 // File already exists.
61 fd_ = open(path.c_str(), O_RDWR, O_CLOEXEC);
62 PCHECK(fd_ != -1) << ": Failed to open " << path;
63 while (true) {
64 struct stat st;
65 PCHECK(fstat(fd_, &st) == 0);
66 if (st.st_size != 0) {
67 CHECK_EQ(static_cast<size_t>(st.st_size), size_)
68 << ": Size of " << path
69 << " doesn't match expected size of backing queue file. Did the "
70 "queue definition change?";
71 break;
72 } else {
73 // The creating process didn't get around to it yet. Give it a bit.
74 std::this_thread::sleep_for(std::chrono::milliseconds(10));
75 VLOG(1) << path << " is zero size, waiting";
76 }
77 }
78 } else {
79 VLOG(1) << "Created " << path;
80 PCHECK(fd_ != -1) << ": Failed to open " << path;
81 PCHECK(ftruncate(fd_, size_) == 0);
82 }
83
84 data_ = mmap(NULL, size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0);
85 PCHECK(data_ != MAP_FAILED);
86
87 ipc_lib::InitializeLocklessQueueMemory(memory(), config_);
88 }
89
90 ~MMapedQueue() {
91 PCHECK(munmap(data_, size_) == 0);
92 PCHECK(close(fd_) == 0);
93 }
94
95 ipc_lib::LocklessQueueMemory *memory() const {
96 return reinterpret_cast<ipc_lib::LocklessQueueMemory *>(data_);
97 }
98
99 const ipc_lib::LocklessQueueConfiguration &config() const {
100 return config_;
101 }
102
103 private:
104 void MkdirP(absl::string_view path) {
105 struct stat st;
106 auto last_slash_pos = path.find_last_of("/");
107
108 std::string folder(last_slash_pos == absl::string_view::npos
109 ? absl::string_view("")
110 : path.substr(0, last_slash_pos));
111 if (stat(folder.c_str(), &st) == -1) {
112 PCHECK(errno == ENOENT);
113 CHECK_NE(folder, "") << ": Base path doesn't exist";
114 MkdirP(folder);
115 VLOG(1) << "Creating " << folder;
116 PCHECK(mkdir(folder.c_str(), FLAGS_permissions) == 0);
117 }
118 }
119
120 ipc_lib::LocklessQueueConfiguration config_;
121
122 int fd_;
123
124 size_t size_;
125 void *data_;
126};
127
128// Returns the portion of the path after the last /.
129absl::string_view Filename(absl::string_view path) {
130 auto last_slash_pos = path.find_last_of("/");
131
132 return last_slash_pos == absl::string_view::npos
133 ? path
134 : path.substr(last_slash_pos + 1, path.size());
135}
136
137ShmEventLoop::ShmEventLoop(const Configuration *configuration)
138 : EventLoop(configuration), name_(Filename(program_invocation_name)) {}
139
140namespace {
141
142namespace chrono = ::std::chrono;
143
144class ShmFetcher : public RawFetcher {
145 public:
146 explicit ShmFetcher(const Channel *channel)
Austin Schuh54cf95f2019-11-29 13:14:18 -0800147 : RawFetcher(channel),
148 lockless_queue_memory_(channel),
Alex Perrycb7da4b2019-08-28 19:35:56 -0700149 lockless_queue_(lockless_queue_memory_.memory(),
150 lockless_queue_memory_.config()),
151 data_storage_(static_cast<AlignedChar *>(aligned_alloc(
152 alignof(AlignedChar), channel->max_size())),
153 &free) {
154 context_.data = nullptr;
155 // Point the queue index at the next index to read starting now. This
156 // makes it such that FetchNext will read the next message sent after
157 // the fetcher is created.
158 PointAtNextQueueIndex();
159 }
160
161 ~ShmFetcher() { data_ = nullptr; }
162
163 // Points the next message to fetch at the queue index which will be
164 // populated next.
165 void PointAtNextQueueIndex() {
166 actual_queue_index_ = lockless_queue_.LatestQueueIndex();
167 if (!actual_queue_index_.valid()) {
168 // Nothing in the queue. The next element will show up at the 0th
169 // index in the queue.
170 actual_queue_index_ =
171 ipc_lib::QueueIndex::Zero(lockless_queue_.queue_size());
172 } else {
173 actual_queue_index_ = actual_queue_index_.Increment();
174 }
175 }
176
177 bool FetchNext() override {
178 // TODO(austin): Write a test which starts with nothing in the queue,
179 // and then calls FetchNext() after something is sent.
180 // TODO(austin): Get behind and make sure it dies both here and with
181 // Fetch.
182 ipc_lib::LocklessQueue::ReadResult read_result = lockless_queue_.Read(
183 actual_queue_index_.index(), &context_.monotonic_sent_time,
184 &context_.realtime_sent_time, &context_.size,
185 reinterpret_cast<char *>(data_storage_.get()));
186 if (read_result == ipc_lib::LocklessQueue::ReadResult::GOOD) {
187 context_.queue_index = actual_queue_index_.index();
188 data_ = reinterpret_cast<char *>(data_storage_.get()) +
189 lockless_queue_.message_data_size() - context_.size;
190 context_.data = data_;
191 actual_queue_index_ = actual_queue_index_.Increment();
192 }
193
194 // Make sure the data wasn't modified while we were reading it. This
195 // can only happen if you are reading the last message *while* it is
196 // being written to, which means you are pretty far behind.
197 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::OVERWROTE)
198 << ": Got behind while reading and the last message was modified "
199 "out "
200 "from under us while we were reading it. Don't get so far "
201 "behind.";
202
203 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::TOO_OLD)
204 << ": The next message is no longer available.";
205 return read_result == ipc_lib::LocklessQueue::ReadResult::GOOD;
206 }
207
208 bool Fetch() override {
209 const ipc_lib::QueueIndex queue_index = lockless_queue_.LatestQueueIndex();
210 // actual_queue_index_ is only meaningful if it was set by Fetch or
211 // FetchNext. This happens when valid_data_ has been set. So, only
212 // skip checking if valid_data_ is true.
213 //
214 // Also, if the latest queue index is invalid, we are empty. So there
215 // is nothing to fetch.
216 if ((data_ != nullptr &&
217 queue_index == actual_queue_index_.DecrementBy(1u)) ||
218 !queue_index.valid()) {
219 return false;
220 }
221
222 ipc_lib::LocklessQueue::ReadResult read_result =
223 lockless_queue_.Read(queue_index.index(), &context_.monotonic_sent_time,
224 &context_.realtime_sent_time, &context_.size,
225 reinterpret_cast<char *>(data_storage_.get()));
226 if (read_result == ipc_lib::LocklessQueue::ReadResult::GOOD) {
227 context_.queue_index = queue_index.index();
228 data_ = reinterpret_cast<char *>(data_storage_.get()) +
229 lockless_queue_.message_data_size() - context_.size;
230 context_.data = data_;
231 actual_queue_index_ = queue_index.Increment();
232 }
233
234 // Make sure the data wasn't modified while we were reading it. This
235 // can only happen if you are reading the last message *while* it is
236 // being written to, which means you are pretty far behind.
237 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::OVERWROTE)
238 << ": Got behind while reading and the last message was modified "
239 "out "
240 "from under us while we were reading it. Don't get so far "
241 "behind.";
242
243 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::NOTHING_NEW)
244 << ": Queue index went backwards. This should never happen.";
245
246 // We fell behind between when we read the index and read the value.
247 // This isn't worth recovering from since this means we went to sleep
248 // for a long time in the middle of this function.
249 CHECK(read_result != ipc_lib::LocklessQueue::ReadResult::TOO_OLD)
250 << ": The next message is no longer available.";
251 return read_result == ipc_lib::LocklessQueue::ReadResult::GOOD;
252 }
253
254 bool RegisterWakeup(int priority) {
255 return lockless_queue_.RegisterWakeup(priority);
256 }
257
258 void UnregisterWakeup() { lockless_queue_.UnregisterWakeup(); }
259
260 private:
261 MMapedQueue lockless_queue_memory_;
262 ipc_lib::LocklessQueue lockless_queue_;
263
264 ipc_lib::QueueIndex actual_queue_index_ =
265 ipc_lib::LocklessQueue::empty_queue_index();
266
267 struct AlignedChar {
268 alignas(32) char data;
269 };
270
271 std::unique_ptr<AlignedChar, decltype(&free)> data_storage_;
272};
273
274class ShmSender : public RawSender {
275 public:
276 explicit ShmSender(const Channel *channel, const ShmEventLoop *shm_event_loop)
Austin Schuh54cf95f2019-11-29 13:14:18 -0800277 : RawSender(channel),
Alex Perrycb7da4b2019-08-28 19:35:56 -0700278 shm_event_loop_(shm_event_loop),
279 name_(channel->name()->str()),
280 lockless_queue_memory_(channel),
281 lockless_queue_(lockless_queue_memory_.memory(),
282 lockless_queue_memory_.config()),
283 lockless_queue_sender_(lockless_queue_.MakeSender()) {}
284
285 void *data() override { return lockless_queue_sender_.Data(); }
286 size_t size() override { return lockless_queue_sender_.size(); }
287 bool Send(size_t size) override {
288 lockless_queue_sender_.Send(size);
289 lockless_queue_.Wakeup(shm_event_loop_->priority());
290 return true;
291 }
292
Austin Schuh4726ce92019-11-29 13:23:18 -0800293 bool Send(const void *msg, size_t length) override {
294 lockless_queue_sender_.Send(reinterpret_cast<const char *>(msg), length);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700295 lockless_queue_.Wakeup(shm_event_loop_->priority());
296 // TODO(austin): Return an error if we send too fast.
297 return true;
298 }
299
300 const absl::string_view name() const override { return name_; }
301
302 private:
303 const ShmEventLoop *shm_event_loop_;
304 std::string name_;
305 MMapedQueue lockless_queue_memory_;
306 ipc_lib::LocklessQueue lockless_queue_;
307 ipc_lib::LocklessQueue::Sender lockless_queue_sender_;
308};
309
310} // namespace
311
312namespace internal {
313
314// Class to manage the state for a Watcher.
315class WatcherState {
316 public:
317 WatcherState(
318 const Channel *channel,
319 std::function<void(const Context &context, const void *message)> watcher)
320 : shm_fetcher_(channel), watcher_(watcher) {}
321
322 ~WatcherState() {}
323
324 // Points the next message to fetch at the queue index which will be populated
325 // next.
326 void PointAtNextQueueIndex() { shm_fetcher_.PointAtNextQueueIndex(); }
327
328 // Returns true if there is new data available.
329 bool HasNewData() {
330 if (!has_new_data_) {
331 has_new_data_ = shm_fetcher_.FetchNext();
332 }
333
334 return has_new_data_;
335 }
336
337 // Returns the time of the current data sample.
338 aos::monotonic_clock::time_point event_time() const {
339 return shm_fetcher_.context().monotonic_sent_time;
340 }
341
342 // Consumes the data by calling the callback.
343 void CallCallback() {
344 CHECK(has_new_data_);
345 watcher_(shm_fetcher_.context(), shm_fetcher_.most_recent_data());
346 has_new_data_ = false;
347 }
348
349 // Starts the thread and waits until it is running.
350 bool RegisterWakeup(int priority) {
351 return shm_fetcher_.RegisterWakeup(priority);
352 }
353
354 void UnregisterWakeup() { return shm_fetcher_.UnregisterWakeup(); }
355
356 private:
357 bool has_new_data_ = false;
358
359 ShmFetcher shm_fetcher_;
360
361 std::function<void(const Context &context, const void *message)> watcher_;
362};
363
364// Adapter class to adapt a timerfd to a TimerHandler.
365// The part of the API which is accessed by the TimerHandler interface needs to
366// be threadsafe. This means Setup and Disable.
367class TimerHandlerState : public TimerHandler {
368 public:
369 TimerHandlerState(ShmEventLoop *shm_event_loop, ::std::function<void()> fn)
370 : shm_event_loop_(shm_event_loop), fn_(::std::move(fn)) {
371 shm_event_loop_->epoll_.OnReadable(timerfd_.fd(), [this]() {
372 timerfd_.Read();
373 fn_();
374 });
375 }
376
377 ~TimerHandlerState() { shm_event_loop_->epoll_.DeleteFd(timerfd_.fd()); }
378
379 void Setup(monotonic_clock::time_point base,
380 monotonic_clock::duration repeat_offset) override {
381 // SetTime is threadsafe already.
382 timerfd_.SetTime(base, repeat_offset);
383 }
384
385 void Disable() override {
386 // Disable is also threadsafe already.
387 timerfd_.Disable();
388 }
389
390 private:
391 ShmEventLoop *shm_event_loop_;
392
393 TimerFd timerfd_;
394
395 // Function to be run on the thread
396 ::std::function<void()> fn_;
397};
398
399// Adapter class to the timerfd and PhasedLoop.
400// The part of the API which is accessed by the PhasedLoopHandler interface
401// needs to be threadsafe. This means set_interval_and_offset
402class PhasedLoopHandler : public ::aos::PhasedLoopHandler {
403 public:
404 PhasedLoopHandler(ShmEventLoop *shm_event_loop, ::std::function<void(int)> fn,
405 const monotonic_clock::duration interval,
406 const monotonic_clock::duration offset)
407 : shm_event_loop_(shm_event_loop),
408 phased_loop_(interval, shm_event_loop_->monotonic_now(), offset),
409 fn_(::std::move(fn)) {
410 shm_event_loop_->epoll_.OnReadable(timerfd_.fd(), [this]() {
411 timerfd_.Read();
412 // Call the function. To avoid needing a recursive mutex, drop the lock
413 // before running the function.
414 fn_(cycles_elapsed_);
415 Reschedule();
416 });
417 }
418
419 ~PhasedLoopHandler() { shm_event_loop_->epoll_.DeleteFd(timerfd_.fd()); }
420
421 void set_interval_and_offset(
422 const monotonic_clock::duration interval,
423 const monotonic_clock::duration offset) override {
424 phased_loop_.set_interval_and_offset(interval, offset);
425 }
426
427 void Startup() {
428 phased_loop_.Reset(shm_event_loop_->monotonic_now());
429 Reschedule();
430 }
431
432 private:
433 // Reschedules the timer. Must be called with the mutex held.
434 void Reschedule() {
435 cycles_elapsed_ = phased_loop_.Iterate(shm_event_loop_->monotonic_now());
436 timerfd_.SetTime(phased_loop_.sleep_time(), ::aos::monotonic_clock::zero());
437 }
438
439 ShmEventLoop *shm_event_loop_;
440
441 TimerFd timerfd_;
442 time::PhasedLoop phased_loop_;
443
444 int cycles_elapsed_ = 1;
445
446 // Function to be run
447 const ::std::function<void(int)> fn_;
448};
449} // namespace internal
450
451::std::unique_ptr<RawFetcher> ShmEventLoop::MakeRawFetcher(
452 const Channel *channel) {
Austin Schuh54cf95f2019-11-29 13:14:18 -0800453 ValidateChannel(channel);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700454 return ::std::unique_ptr<RawFetcher>(new ShmFetcher(channel));
455}
456
457::std::unique_ptr<RawSender> ShmEventLoop::MakeRawSender(
458 const Channel *channel) {
Austin Schuh54cf95f2019-11-29 13:14:18 -0800459 ValidateChannel(channel);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700460 Take(channel);
461 return ::std::unique_ptr<RawSender>(new ShmSender(channel, this));
462}
463
464void ShmEventLoop::MakeRawWatcher(
465 const Channel *channel,
466 std::function<void(const Context &context, const void *message)> watcher) {
Austin Schuh54cf95f2019-11-29 13:14:18 -0800467 ValidateChannel(channel);
Alex Perrycb7da4b2019-08-28 19:35:56 -0700468 Take(channel);
469
470 ::std::unique_ptr<internal::WatcherState> state(
471 new internal::WatcherState(
472 channel, std::move(watcher)));
473 watchers_.push_back(::std::move(state));
474}
475
476TimerHandler *ShmEventLoop::AddTimer(::std::function<void()> callback) {
477 ::std::unique_ptr<internal::TimerHandlerState> timer(
478 new internal::TimerHandlerState(this, ::std::move(callback)));
479
480 timers_.push_back(::std::move(timer));
481
482 return timers_.back().get();
483}
484
485PhasedLoopHandler *ShmEventLoop::AddPhasedLoop(
486 ::std::function<void(int)> callback,
487 const monotonic_clock::duration interval,
488 const monotonic_clock::duration offset) {
489 ::std::unique_ptr<internal::PhasedLoopHandler> phased_loop(
490 new internal::PhasedLoopHandler(this, ::std::move(callback), interval,
491 offset));
492
493 phased_loops_.push_back(::std::move(phased_loop));
494
495 return phased_loops_.back().get();
496}
497
498void ShmEventLoop::OnRun(::std::function<void()> on_run) {
499 on_run_.push_back(::std::move(on_run));
500}
501
502void ShmEventLoop::Run() {
503 std::unique_ptr<ipc_lib::SignalFd> signalfd;
504
505 if (watchers_.size() > 0) {
506 signalfd.reset(new ipc_lib::SignalFd({ipc_lib::kWakeupSignal}));
507
508 epoll_.OnReadable(signalfd->fd(), [signalfd_ptr = signalfd.get(), this]() {
509 signalfd_siginfo result = signalfd_ptr->Read();
510 CHECK_EQ(result.ssi_signo, ipc_lib::kWakeupSignal);
511
512 // TODO(austin): We should really be checking *everything*, not just
513 // watchers, and calling the oldest thing first. That will improve
514 // determinism a lot.
515
516 while (true) {
517 // Call the handlers in time order of their messages.
518 aos::monotonic_clock::time_point min_event_time =
519 aos::monotonic_clock::max_time;
520 size_t min_watcher_index = -1;
521 size_t watcher_index = 0;
522 for (::std::unique_ptr<internal::WatcherState> &watcher : watchers_) {
523 if (watcher->HasNewData()) {
524 if (watcher->event_time() < min_event_time) {
525 min_watcher_index = watcher_index;
526 min_event_time = watcher->event_time();
527 }
528 }
529 ++watcher_index;
530 }
531
532 if (min_event_time == aos::monotonic_clock::max_time) {
533 break;
534 }
535
536 watchers_[min_watcher_index]->CallCallback();
537 }
538 });
539 }
540
541 // Now, all the threads are up. Lock everything into memory and go RT.
542 if (priority_ != 0) {
543 ::aos::InitRT();
544
545 LOG(INFO) << "Setting priority to " << priority_;
546 ::aos::SetCurrentThreadRealtimePriority(priority_);
547 }
548
549 set_is_running(true);
550
551 // Now that we are realtime (but before the OnRun handlers run), snap the
552 // queue index.
553 for (::std::unique_ptr<internal::WatcherState> &watcher : watchers_) {
554 watcher->PointAtNextQueueIndex();
555 CHECK(watcher->RegisterWakeup(priority_));
556 }
557
558 // Now that we are RT, run all the OnRun handlers.
559 for (const auto &run : on_run_) {
560 run();
561 }
562
563 // Start up all the phased loops.
564 for (::std::unique_ptr<internal::PhasedLoopHandler> &phased_loop :
565 phased_loops_) {
566 phased_loop->Startup();
567 }
568
569 // And start our main event loop which runs all the timers and handles Quit.
570 epoll_.Run();
571
572 // Once epoll exits, there is no useful nonrt work left to do.
573 set_is_running(false);
574
575 // Nothing time or synchronization critical needs to happen after this point.
576 // Drop RT priority.
577 ::aos::UnsetCurrentThreadRealtimePriority();
578
579 for (::std::unique_ptr<internal::WatcherState> &watcher : watchers_) {
580 watcher->UnregisterWakeup();
581 }
582
583 if (watchers_.size() > 0) {
584 epoll_.DeleteFd(signalfd->fd());
585 signalfd.reset();
586 }
587}
588
589void ShmEventLoop::Exit() { epoll_.Quit(); }
590
591ShmEventLoop::~ShmEventLoop() {
592 CHECK(!is_running()) << ": ShmEventLoop destroyed while running";
593}
594
595void ShmEventLoop::Take(const Channel *channel) {
596 CHECK(!is_running()) << ": Cannot add new objects while running.";
597
598 // Cheat aggresively. Use the shared memory path as a proxy for a unique
599 // identifier for the channel.
600 const std::string path = ShmPath(channel);
601
602 const auto prior = ::std::find(taken_.begin(), taken_.end(), path);
603 CHECK(prior == taken_.end()) << ": " << path << " is already being used.";
604
605 taken_.emplace_back(path);
606}
607
608void ShmEventLoop::SetRuntimeRealtimePriority(int priority) {
609 if (is_running()) {
610 LOG(FATAL) << "Cannot set realtime priority while running.";
611 }
612 priority_ = priority;
613}
614
615} // namespace aos