blob: cd0e2ae15b8743ab42ad2affe5ae00a047d294ee [file] [log] [blame]
Austin Schuh20b2b082019-09-11 20:42:56 -07001#include "aos/ipc_lib/lockless_queue.h"
2
3#include <linux/futex.h>
4#include <sys/types.h>
5#include <syscall.h>
6#include <unistd.h>
7#include <algorithm>
8#include <iomanip>
9#include <iostream>
10#include <sstream>
11
12#include "aos/init.h"
13#include "aos/ipc_lib/lockless_queue_memory.h"
14#include "aos/logging/logging.h"
15#include "aos/util/compiler_memory_barrier.h"
Austin Schuhf257f3c2019-10-27 21:00:43 -070016#include "glog/logging.h"
Austin Schuh20b2b082019-09-11 20:42:56 -070017
18namespace aos {
19namespace ipc_lib {
20
21namespace {
22
23constexpr bool kDebug = false;
24
25void GrabQueueSetupLockOrDie(LocklessQueueMemory *memory) {
26 const int result = mutex_grab(&(memory->queue_setup_lock));
27 CHECK(result == 0 || result == 1);
28}
29
30// This must be called under the queue_setup_lock.
31void Cleanup(LocklessQueueMemory *memory) {
32 const size_t num_senders = memory->num_senders();
33 const size_t queue_size = memory->queue_size();
34 const size_t num_messages = memory->num_messages();
35
36 // There are a large number of crazy cases here for how things can go wrong
37 // and how we have to recover. They either require us to keep extra track of
38 // what is going on, slowing down the send path, or require a large number of
39 // cases.
40 //
41 // The solution here is to not over-think it. This is running while not real
42 // time during construction. It is allowed to be slow. It will also very
43 // rarely trigger. There is a small uS window where process death is
44 // ambiguous.
45 //
46 // So, build up a list N long, where N is the number of messages. Search
47 // through the entire queue and the sender list (ignoring any dead senders),
48 // and mark down which ones we have seen. Once we have seen all the messages
49 // except the N dead senders, we know which messages are dead. Because the
50 // queue is active while we do this, it may take a couple of go arounds to see
51 // everything.
52
53 // Do the easy case. Find all senders who have died. See if they are either
54 // consistent already, or if they have copied over to_replace to the scratch
55 // index, but haven't cleared to_replace. Count them.
56 size_t valid_senders = 0;
57 for (size_t i = 0; i < num_senders; ++i) {
58 Sender *sender = memory->GetSender(i);
59 const uint32_t tid =
60 __atomic_load_n(&(sender->tid.futex), __ATOMIC_RELAXED);
61 if (tid & FUTEX_OWNER_DIED) {
62 if (kDebug) {
63 printf("Found an easy death for sender %zu\n", i);
64 }
65 const Index to_replace = sender->to_replace.RelaxedLoad();
66 const Index scratch_index = sender->scratch_index.Load();
67
68 // I find it easiest to think about this in terms of the set of observable
69 // states. The main code follows the following states:
70
71 // 1) scratch_index = xxx
72 // to_replace = invalid
73 // This is unambiguous. Already good.
74
75 // 2) scratch_index = xxx
76 // to_replace = yyy
77 // Very ambiguous. Is xxx or yyy the correct one? Need to either roll
78 // this forwards or backwards.
79
80 // 3) scratch_index = yyy
81 // to_replace = yyy
82 // We are in the act of moving to_replace to scratch_index, but didn't
83 // finish. Easy.
84
85 // 4) scratch_index = yyy
86 // to_replace = invalid
87 // Finished, but died. Looks like 1)
88
89 // Any cleanup code needs to follow the same set of states to be robust to
90 // death, so death can be restarted.
91
92 // Could be 2) or 3).
93 if (to_replace.valid()) {
94 // 3)
95 if (to_replace == scratch_index) {
96 // Just need to invalidate to_replace to finish.
97 sender->to_replace.Invalidate();
98
99 // And mark that we succeeded.
100 __atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
101 ++valid_senders;
102 }
103 } else {
104 // 1) or 4). Make sure we aren't corrupted and declare victory.
105 CHECK(scratch_index.valid());
106
107 __atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
108 ++valid_senders;
109 }
110 } else {
111 // Not dead.
112 ++valid_senders;
113 }
114 }
115
116 // If all the senders are (or were made) good, there is no need to do the hard
117 // case.
118 if (valid_senders == num_senders) {
119 return;
120 }
121
122 if (kDebug) {
123 printf("Starting hard cleanup\n");
124 }
125
126 size_t num_accounted_for = 0;
127 size_t num_missing = 0;
128 ::std::vector<bool> accounted_for(num_messages, false);
129
130 while ((num_accounted_for + num_missing) != num_messages) {
131 num_missing = 0;
132 for (size_t i = 0; i < num_senders; ++i) {
133 Sender *sender = memory->GetSender(i);
134 const uint32_t tid =
135 __atomic_load_n(&(sender->tid.futex), __ATOMIC_RELAXED);
136 if (tid & FUTEX_OWNER_DIED) {
137 ++num_missing;
138 } else {
139 const Index scratch_index = sender->scratch_index.RelaxedLoad();
140 if (!accounted_for[scratch_index.message_index()]) {
141 ++num_accounted_for;
142 }
143 accounted_for[scratch_index.message_index()] = true;
144 }
145 }
146
147 for (size_t i = 0; i < queue_size; ++i) {
148 const Index index = memory->GetQueue(i)->RelaxedLoad();
149 if (!accounted_for[index.message_index()]) {
150 ++num_accounted_for;
151 }
152 accounted_for[index.message_index()] = true;
153 }
154 }
155
156 while (num_missing != 0) {
157 const size_t starting_num_missing = num_missing;
158 for (size_t i = 0; i < num_senders; ++i) {
159 Sender *sender = memory->GetSender(i);
160 const uint32_t tid =
161 __atomic_load_n(&(sender->tid.futex), __ATOMIC_RELAXED);
162 if (tid & FUTEX_OWNER_DIED) {
163 const Index scratch_index = sender->scratch_index.RelaxedLoad();
164 const Index to_replace = sender->to_replace.RelaxedLoad();
165
166 // Candidate.
167 CHECK_LE(to_replace.message_index(), accounted_for.size());
168 if (accounted_for[to_replace.message_index()]) {
169 if (kDebug) {
170 printf("Sender %zu died, to_replace is already accounted for\n", i);
171 }
172 // If both are accounted for, we are corrupt...
173 CHECK(!accounted_for[scratch_index.message_index()]);
174
175 // to_replace is already accounted for. This means that we didn't
176 // atomically insert scratch_index into the queue yet. So
177 // invalidate to_replace.
178 sender->to_replace.Invalidate();
179
180 // And then mark this sender clean.
181 __atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
182
183 // And account for scratch_index.
184 accounted_for[scratch_index.message_index()] = true;
185 --num_missing;
186 ++num_accounted_for;
187 } else if (accounted_for[scratch_index.message_index()]) {
188 if (kDebug) {
189 printf("Sender %zu died, scratch_index is already accounted for\n", i);
190 }
191 // scratch_index is accounted for. That means we did the insert,
192 // but didn't record it.
193 CHECK(to_replace.valid());
194 // Finish the transaction. Copy to_replace, then clear it.
195
196 sender->scratch_index.Store(to_replace);
197 sender->to_replace.Invalidate();
198
199 // And then mark this sender clean.
200 __atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
201
202 // And account for to_replace.
203 accounted_for[to_replace.message_index()] = true;
204 --num_missing;
205 ++num_accounted_for;
206 } else {
207 if (kDebug) {
208 printf("Sender %zu died, neither is accounted for\n", i);
209 }
210 // Ambiguous. There will be an unambiguous one somewhere that we
211 // can do first.
212 }
213 }
214 }
215 // CHECK that we are making progress.
216 CHECK_NE(num_missing, starting_num_missing);
217 }
218}
219
220// Exposes rt_tgsigqueueinfo so we can send the signal *just* to the target
221// thread.
222int rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, siginfo_t *si) {
223 return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, si);
224}
225
226} // namespace
227
228size_t LocklessQueueMemorySize(LocklessQueueConfiguration config) {
229 // Round up the message size so following data is double aligned. That should
230 // be overkill for most platforms. And the checks below confirms it.
231 config.message_data_size = (config.message_data_size + 7) & ~0x7;
232
233 // As we build up the size, confirm that everything is aligned to the
234 // alignment requirements of the type.
235 size_t size = sizeof(LocklessQueueMemory);
236 CHECK_EQ(size & (alignof(LocklessQueueMemory) - 1), 0u);
237
238 CHECK_EQ(size & (alignof(AtomicIndex) - 1), 0u);
239 size += LocklessQueueMemory::SizeOfQueue(config);
240
241 CHECK_EQ(size & (alignof(Message) - 1), 0u);
242 size += LocklessQueueMemory::SizeOfMessages(config);
243
244 CHECK_EQ(size & (alignof(Watcher) - 1), 0u);
245 size += LocklessQueueMemory::SizeOfWatchers(config);
246
247 CHECK_EQ(size & (alignof(Sender) - 1), 0u);
248 size += LocklessQueueMemory::SizeOfSenders(config);
249
250 return size;
251}
252
253LocklessQueueMemory *InitializeLocklessQueueMemory(
254 LocklessQueueMemory *memory, LocklessQueueConfiguration config) {
255 // Everything should be zero initialized already. So we just need to fill
256 // everything out properly.
257
258 // Grab the mutex. We don't care if the previous reader died. We are going
259 // to check everything anyways.
260 GrabQueueSetupLockOrDie(memory);
261
262 if (!memory->initialized) {
263 // TODO(austin): Check these for out of bounds.
264 memory->config.num_watchers = config.num_watchers;
265 memory->config.num_senders = config.num_senders;
266 memory->config.queue_size = config.queue_size;
267 // Round up to the nearest double word bytes.
268 memory->config.message_data_size = (config.message_data_size + 7) & ~0x7;
269
270 const size_t num_messages = memory->num_messages();
271 // There need to be at most MaxMessages() messages allocated.
272 CHECK_LE(num_messages, Index::MaxMessages());
273
274 for (size_t i = 0; i < num_messages; ++i) {
275 memory->GetMessage(Index(QueueIndex::Zero(memory->queue_size()), i))
276 ->header.queue_index.Invalidate();
277 }
278
279 for (size_t i = 0; i < memory->queue_size(); ++i) {
280 // Make the initial counter be the furthest away number. That means that
281 // index 0 should be 0xffff, 1 should be 0, etc.
282 memory->GetQueue(i)->Store(Index(QueueIndex::Zero(memory->queue_size())
283 .IncrementBy(i)
284 .DecrementBy(memory->queue_size()),
285 i));
286 }
287
288 memory->next_queue_index.Invalidate();
289
290 for (size_t i = 0; i < memory->num_senders(); ++i) {
291 ::aos::ipc_lib::Sender *s = memory->GetSender(i);
292 s->scratch_index.Store(Index(0xffff, i + memory->queue_size()));
293 s->to_replace.RelaxedInvalidate();
294 }
295
296 // Signal everything is done. This needs to be done last, so if we die, we
297 // redo initialization.
298 // This is a full atomic (probably overkill), but this is at initialization
299 // time, so it is cheap.
300 memory->initialized.store(true);
301 }
302
303 mutex_unlock(&(memory->queue_setup_lock));
304 return memory;
305}
306
307LocklessQueue::LocklessQueue(LocklessQueueMemory *memory,
308 LocklessQueueConfiguration config)
309 : memory_(InitializeLocklessQueueMemory(memory, config)),
310 watcher_copy_(memory_->num_watchers()),
311 pid_(getpid()),
312 uid_(getuid()) {}
313
314LocklessQueue::~LocklessQueue() {
315 CHECK_EQ(watcher_index_, -1);
316
317 GrabQueueSetupLockOrDie(memory_);
318 const int num_watchers = memory_->num_watchers();
319 // Cleanup is cheap. Go for it anyways.
320
321 // And confirm that nothing is owned by us.
322 for (int i = 0; i < num_watchers; ++i) {
323 CHECK(!mutex_islocked(&(memory_->GetWatcher(i)->tid)));
324 }
325 mutex_unlock(&(memory_->queue_setup_lock));
326}
327
328size_t LocklessQueue::QueueSize() const { return memory_->queue_size(); }
329
330bool LocklessQueue::RegisterWakeup(int priority) {
331 // TODO(austin): Make sure signal coalescing is turned on. We don't need
332 // duplicates. That will improve performance under high load.
333
334 // Since everything is self consistent, all we need to do is make sure nobody
335 // else is running. Someone dying will get caught in the generic consistency
336 // check.
337 GrabQueueSetupLockOrDie(memory_);
338 const int num_watchers = memory_->num_watchers();
339
340 // Now, find the first empty watcher and grab it.
341 CHECK_EQ(watcher_index_, -1);
342 for (int i = 0; i < num_watchers; ++i) {
343 const uint32_t tid =
344 __atomic_load_n(&(memory_->GetWatcher(i)->tid.futex), __ATOMIC_RELAXED);
345 if (tid == 0 || tid & FUTEX_OWNER_DIED) {
346 watcher_index_ = i;
347 break;
348 }
349 }
350
351 // Bail if we failed to find an open slot.
352 if (watcher_index_ == -1) {
353 mutex_unlock(&(memory_->queue_setup_lock));
354 return false;
355 }
356
357 Watcher *w = memory_->GetWatcher(watcher_index_);
358
359 w->pid = getpid();
360 w->priority = priority;
361
362 // Grabbing a mutex is a compiler and memory barrier, so nothing before will
363 // get rearranged afterwords.
364 //
365 // Since everything is done under the queue_setup_lock, this should always
366 // return immediately.
367 const int result = mutex_grab(&(w->tid));
368
369 mutex_unlock(&(memory_->queue_setup_lock));
370
371 // We should either get the lock, or the previous owner should have died.
372 // Anything else is a pretty serious error.
373 return result == 0 || result == 1;
374}
375
376void LocklessQueue::UnregisterWakeup() {
377 // Since everything is self consistent, all we need to do is make sure nobody
378 // else is running. Someone dying will get caught in the generic consistency
379 // check.
380 GrabQueueSetupLockOrDie(memory_);
381
382 // Make sure we are registered.
383 CHECK_NE(watcher_index_, -1);
384
385 // Make sure we still own the slot we are supposed to.
386 CHECK(mutex_islocked(&(memory_->GetWatcher(watcher_index_)->tid)));
387
388 // The act of unlocking invalidates the entry. Invalidate it.
389 mutex_unlock(&(memory_->GetWatcher(watcher_index_)->tid));
390 // And internally forget the slot.
391 watcher_index_ = -1;
392
393 mutex_unlock(&(memory_->queue_setup_lock));
394}
395
396int LocklessQueue::Wakeup(const int current_priority) {
397 const size_t num_watchers = memory_->num_watchers();
398
399 CHECK_EQ(watcher_copy_.size(), num_watchers);
400
401 // Grab a copy so it won't change out from underneath us, and we can sort it
402 // nicely in C++.
403 // Do note that there is still a window where the process can die *after* we
404 // read everything. We will still PI boost and send a signal to the thread in
405 // question. There is no way without pidfd's to close this window, and
406 // creating a pidfd is likely not RT.
407 for (size_t i = 0; i < num_watchers; ++i) {
408 Watcher *w = memory_->GetWatcher(i);
409 // Start by reading the tid. This needs to be atomic to force it to come first.
410 watcher_copy_[i].tid = __atomic_load_n(&(w->tid.futex), __ATOMIC_SEQ_CST);
411 watcher_copy_[i].pid = w->pid;
412 watcher_copy_[i].priority = w->priority;
413
414 // Use a priority of -1 to mean an invalid entry to make sorting easier.
415 if (watcher_copy_[i].tid & FUTEX_OWNER_DIED || watcher_copy_[i].tid == 0) {
416 watcher_copy_[i].priority = -1;
417 } else if (watcher_copy_[i].tid !=
418 static_cast<pid_t>(
419 __atomic_load_n(&(w->tid.futex), __ATOMIC_SEQ_CST))) {
420 // Confirm that the watcher hasn't been re-used and modified while we read
421 // it. If it has, mark it invalid again.
422 watcher_copy_[i].priority = -1;
423 watcher_copy_[i].tid = 0;
424 }
425 }
426
427 // Now sort.
428 ::std::sort(watcher_copy_.begin(), watcher_copy_.end(),
429 [](const WatcherCopy &a, const WatcherCopy &b) {
430 return a.priority > b.priority;
431 });
432
433 int count = 0;
434 if (watcher_copy_[0].priority != -1) {
435 const int max_priority =
436 ::std::max(current_priority, watcher_copy_[0].priority);
437 // Boost if we are RT and there is a higher priority sender out there.
438 // Otherwise we might run into priority inversions.
439 if (max_priority > current_priority && current_priority > 0) {
440 SetCurrentThreadRealtimePriority(max_priority);
441 }
442
443 // Build up the siginfo to send.
444 siginfo_t uinfo;
445 memset(&uinfo, 0, sizeof(uinfo));
446
447 uinfo.si_code = SI_QUEUE;
448 uinfo.si_pid = pid_;
449 uinfo.si_uid = uid_;
450 uinfo.si_value.sival_int = 0;
451
452 for (const WatcherCopy &watcher_copy : watcher_copy_) {
453 // The first -1 priority means we are at the end of the valid list.
454 if (watcher_copy.priority == -1) {
455 break;
456 }
457
458 // Send the signal. Target just the thread that sent it so that we can
459 // support multiple watchers in a process (when someone creates multiple
460 // event loops in different threads).
461 rt_tgsigqueueinfo(watcher_copy.pid, watcher_copy.tid, kWakeupSignal,
462 &uinfo);
463
464 ++count;
465 }
466
467 // Drop back down if we were boosted.
468 if (max_priority > current_priority && current_priority > 0) {
469 SetCurrentThreadRealtimePriority(current_priority);
470 }
471 }
472
473 return count;
474}
475
476LocklessQueue::Sender::Sender(LocklessQueueMemory *memory) : memory_(memory) {
477 GrabQueueSetupLockOrDie(memory_);
478
479 // Since we already have the lock, go ahead and try cleaning up.
480 Cleanup(memory_);
481
482 const int num_senders = memory_->num_senders();
483
484 for (int i = 0; i < num_senders; ++i) {
485 ::aos::ipc_lib::Sender *s = memory->GetSender(i);
486 const uint32_t tid = __atomic_load_n(&(s->tid.futex), __ATOMIC_RELAXED);
487 if (tid == 0) {
488 sender_index_ = i;
489 break;
490 }
491 }
492
493 if (sender_index_ == -1) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700494 LOG(FATAL) << "Too many senders";
Austin Schuh20b2b082019-09-11 20:42:56 -0700495 }
496
497 ::aos::ipc_lib::Sender *s = memory_->GetSender(sender_index_);
498
499 // Atomically grab the mutex. This signals that we are alive. If the
500 // previous owner died, we don't care, and want to grab the mutex anyways.
501 const int result = mutex_grab(&(s->tid));
502 CHECK(result == 0 || result == 1);
503
504 mutex_unlock(&(memory->queue_setup_lock));
505}
506
507LocklessQueue::Sender::~Sender() {
508 if (memory_ != nullptr) {
509 mutex_unlock(&(memory_->GetSender(sender_index_)->tid));
510 }
511}
512
513LocklessQueue::Sender LocklessQueue::MakeSender() {
514 return LocklessQueue::Sender(memory_);
515}
516
517QueueIndex ZeroOrValid(QueueIndex index) {
518 if (!index.valid()) {
519 return index.Clear();
520 }
521 return index;
522}
523
524void LocklessQueue::Sender::Send(const char *data, size_t length) {
525 const size_t queue_size = memory_->queue_size();
526 CHECK_LE(length, memory_->message_data_size());
527
528 ::aos::ipc_lib::Sender *sender = memory_->GetSender(sender_index_);
529 Index scratch_index = sender->scratch_index.RelaxedLoad();
530 Message *message = memory_->GetMessage(scratch_index);
531
532 message->header.queue_index.Invalidate();
533
534 message->header.length = length;
535 memcpy(&message->data[0], data, length);
536
537 while (true) {
538 const QueueIndex actual_next_queue_index =
539 memory_->next_queue_index.Load(queue_size);
540 const QueueIndex next_queue_index = ZeroOrValid(actual_next_queue_index);
541
542 const QueueIndex incremented_queue_index = next_queue_index.Increment();
543
544 // TODO(austin): I think we can drop the barrier off this.
545 const Index to_replace = memory_->LoadIndex(next_queue_index);
546
547 const QueueIndex decremented_queue_index =
548 next_queue_index.DecrementBy(queue_size);
549
550 // See if we got beat. If we did, try to atomically update
551 // next_queue_index in case the previous writer failed and retry.
552 if (!to_replace.IsPlausible(decremented_queue_index)) {
553 // We don't care about the result. It will either succeed, or we got
554 // beat in fixing it and just need to give up and try again. If we got
555 // beat multiple times, the only way progress can be made is if the queue
556 // is updated as well. This means that if we retry reading
557 // next_queue_index, we will be at most off by one and can retry.
558 //
559 // Both require no further action from us.
560 //
561 // TODO(austin): If we are having fairness issues under contention, we
562 // could have a mode bit in next_queue_index, and could use a lock or some
563 // other form of PI boosting to let the higher priority task win.
564 memory_->next_queue_index.CompareAndExchangeStrong(
565 actual_next_queue_index, incremented_queue_index);
566
567 if (kDebug) {
568 printf("We were beat. Try again. Was %x, is %x\n", to_replace.get(),
569 decremented_queue_index.index());
570 }
571 continue;
572 }
573
574 // Confirm that the message is what it should be.
575 {
576 // We just need this to be atomic and after the index has been calculated
577 // and before we exchange the index back in. Both of those will be strong
578 // barriers, so this is fine.
579 const QueueIndex previous_index =
580 memory_->GetMessage(to_replace)
581 ->header.queue_index.RelaxedLoad(queue_size);
582 if (previous_index != decremented_queue_index && previous_index.valid()) {
583 // Retry.
584 if (kDebug) {
585 printf(
586 "Something fishy happened, queue index doesn't match. Retrying. "
587 " Previous index was %x, should be %x\n",
588 previous_index.index(), decremented_queue_index.index());
589 }
590 continue;
591 }
592 }
593
594 message->header.monotonic_sent_time = ::aos::monotonic_clock::now();
595 message->header.realtime_sent_time = ::aos::realtime_clock::now();
596
597 // Before we are fully done filling out the message, update the Sender state
598 // with the new index to write. This re-uses the barrier for the
599 // queue_index store.
600 const Index index_to_write(next_queue_index,
601 scratch_index.message_index());
602
603 sender->scratch_index.RelaxedStore(index_to_write);
604
605 message->header.queue_index.Store(next_queue_index);
606
607 // The message is now filled out, and we have a confirmed slot to store
608 // into.
609 //
610 // Start by writing down what we are going to pull out of the queue. This
611 // was Invalid before now.
612 sender->to_replace.RelaxedStore(to_replace);
613
614 // Then exchange the next index into the queue.
615 if (!memory_->GetQueue(next_queue_index.Wrapped())
616 ->CompareAndExchangeStrong(to_replace, index_to_write)) {
617 // Aw, didn't succeed. Retry.
618 sender->to_replace.RelaxedInvalidate();
619 if (kDebug) {
620 printf("Failed to wrap into queue\n");
621 }
622 continue;
623 }
624
625 // Then update next_queue_index to save the next user some computation time.
626 memory_->next_queue_index.CompareAndExchangeStrong(actual_next_queue_index,
627 incremented_queue_index);
628
629 // Now update the scratch space and record that we succeeded.
630 sender->scratch_index.Store(to_replace);
631 // And then clear out the entry used to replace. This just needs to be
632 // atomic. It can't be moved above the store because that is a full
633 // barrier, but delaying it until later will only affect things if something
634 // died.
635 sender->to_replace.RelaxedInvalidate();
636 break;
637 }
638}
639
640LocklessQueue::ReadResult LocklessQueue::Read(
641 uint32_t uint32_queue_index,
642 ::aos::monotonic_clock::time_point *monotonic_sent_time,
643 ::aos::realtime_clock::time_point *realtime_sent_time, size_t *length,
644 char *data) {
645 const size_t queue_size = memory_->queue_size();
646
647 // Build up the QueueIndex.
648 const QueueIndex queue_index =
649 QueueIndex::Zero(queue_size).IncrementBy(uint32_queue_index);
650
651 // Read the message stored at the requested location.
652 Index mi = memory_->LoadIndex(queue_index);
653 Message *m = memory_->GetMessage(mi);
654
655 while (true) {
656 // We need to confirm that the data doesn't change while we are reading it.
657 // Do that by first confirming that the message points to the queue index we
658 // want.
659 const QueueIndex starting_queue_index =
660 m->header.queue_index.Load(queue_size);
661 if (starting_queue_index != queue_index) {
662 // If we found a message that is exactly 1 loop old, we just wrapped.
663 if (starting_queue_index == queue_index.DecrementBy(queue_size)) {
664 if (kDebug) {
665 printf("Matches: %x, %x\n", starting_queue_index.index(),
666 queue_index.DecrementBy(queue_size).index());
667 }
668 return ReadResult::NOTHING_NEW;
669 } else {
670 // Someone has re-used this message between when we pulled it out of the
671 // queue and when we grabbed its index. It is pretty hard to deduce
672 // what happened. Just try again.
673 Message *new_m = memory_->GetMessage(queue_index);
674 if (m != new_m) {
675 m = new_m;
676 if (kDebug) {
677 printf("Retrying, m doesn't match\n");
678 }
679 continue;
680 }
681
682 // We have confirmed that message still points to the same message. This
683 // means that the message didn't get swapped out from under us, so
684 // starting_queue_index is correct.
685 //
686 // Either we got too far behind (signaled by this being a valid
687 // message), or this is one of the initial messages which are invalid.
688 if (starting_queue_index.valid()) {
689 if (kDebug) {
690 printf("Too old. Tried for %x, got %x, behind by %d\n",
691 queue_index.index(), starting_queue_index.index(),
692 starting_queue_index.index() - queue_index.index());
693 }
694 return ReadResult::TOO_OLD;
695 }
696
697 if (kDebug) {
698 printf("Initial\n");
699 }
700
701 // There isn't a valid message at this location.
702 //
703 // If someone asks for one of the messages within the first go around,
704 // then they need to wait. They got ahead. Otherwise, they are
705 // asking for something crazy, like something before the beginning of
706 // the queue. Tell them that they are behind.
707 if (uint32_queue_index < memory_->queue_size()) {
708 if (kDebug) {
709 printf("Near zero, %x\n", uint32_queue_index);
710 }
711 return ReadResult::NOTHING_NEW;
712 } else {
713 if (kDebug) {
714 printf("not near zero, %x\n", uint32_queue_index);
715 }
716 return ReadResult::TOO_OLD;
717 }
718 }
719 }
720 if (kDebug) {
721 printf("Eq: %x, %x\n", starting_queue_index.index(), queue_index.index());
722 }
723 break;
724 }
725
726 // Then read the data out.
727 *monotonic_sent_time = m->header.monotonic_sent_time;
728 *realtime_sent_time = m->header.realtime_sent_time;
729 memcpy(data, &m->data[0], m->header.length);
730 *length = m->header.length;
731
732 // And finally, confirm that the message *still* points to the queue index we
733 // want. This means it didn't change out from under us.
734 // If something changed out from under us, we were reading it much too late in
735 // it's lifetime.
736 const QueueIndex final_queue_index = m->header.queue_index.Load(queue_size);
737 if (final_queue_index != queue_index) {
738 if (kDebug) {
739 printf(
740 "Changed out from under us. Reading %x, finished with %x, delta: "
741 "%d\n",
742 queue_index.index(), final_queue_index.index(),
743 final_queue_index.index() - queue_index.index());
744 }
745 return ReadResult::TOO_OLD;
746 }
747
748 return ReadResult::GOOD;
749}
750
751uint32_t LocklessQueue::LatestQueueIndex() {
752 const size_t queue_size = memory_->queue_size();
753
754 // There is only one interesting case. We need to know if the queue is empty.
755 // That is done with a sentinel value. At worst, this will be off by one.
756 const QueueIndex next_queue_index =
757 memory_->next_queue_index.Load(queue_size);
758 if (next_queue_index.valid()) {
759 const QueueIndex current_queue_index = next_queue_index.DecrementBy(1u);
760 return current_queue_index.index();
761 } else {
762 return empty_queue_index();
763 }
764}
765
766namespace {
767
768// Prints out the mutex state. Not safe to use while the mutex is being
769// changed.
770::std::string PrintMutex(aos_mutex *mutex) {
771 ::std::stringstream s;
772 s << "aos_mutex(" << ::std::hex << mutex->futex;
773
774 if (mutex->futex != 0) {
775 s << ":";
776 if (mutex->futex & FUTEX_OWNER_DIED) {
777 s << "FUTEX_OWNER_DIED|";
778 }
779 s << "tid=" << (mutex->futex & FUTEX_TID_MASK);
780 }
781
782 s << ")";
783 return s.str();
784}
785
786} // namespace
787
788void PrintLocklessQueueMemory(LocklessQueueMemory *memory) {
789 const size_t queue_size = memory->queue_size();
790 ::std::cout << "LocklessQueueMemory (" << memory << ") {" << ::std::endl;
791 ::std::cout << " aos_mutex queue_setup_lock = "
792 << PrintMutex(&memory->queue_setup_lock) << ::std::endl;
793 ::std::cout << " ::std::atomic<bool> initialized = " << memory->initialized
794 << ::std::endl;
795 ::std::cout << " config {" << ::std::endl;
796 ::std::cout << " size_t num_watchers = " << memory->config.num_watchers
797 << ::std::endl;
798 ::std::cout << " size_t num_senders = " << memory->config.num_senders
799 << ::std::endl;
800 ::std::cout << " size_t queue_size = " << memory->config.queue_size
801 << ::std::endl;
802 ::std::cout << " size_t message_data_size = "
803 << memory->config.message_data_size << ::std::endl;
804
805 ::std::cout << " AtomicQueueIndex next_queue_index = "
806 << memory->next_queue_index.Load(queue_size).DebugString()
807 << ::std::endl;
808
809 ::std::cout << " }" << ::std::endl;
810 ::std::cout << " AtomicIndex queue[" << queue_size << "] {" << ::std::endl;
811 for (size_t i = 0; i < queue_size; ++i) {
812 ::std::cout << " [" << i << "] -> "
813 << memory->GetQueue(i)->Load().DebugString() << ::std::endl;
814 }
815 ::std::cout << " }" << ::std::endl;
816 ::std::cout << " Message messages[" << memory->num_messages() << "] {"
817 << ::std::endl;
818 for (size_t i = 0; i < memory->num_messages(); ++i) {
819 Message *m = memory->GetMessage(Index(i, i));
820 ::std::cout << " [" << i << "] -> Message {" << ::std::endl;
821 ::std::cout << " Header {" << ::std::endl;
822 ::std::cout << " AtomicQueueIndex queue_index = "
823 << m->header.queue_index.Load(queue_size).DebugString()
824 << ::std::endl;
825 ::std::cout << " size_t length = " << m->header.length
826 << ::std::endl;
827 ::std::cout << " }" << ::std::endl;
828 ::std::cout << " data: {";
829
830 for (size_t j = 0; j < m->header.length; ++j) {
831 char data = m->data[j];
832 if (j != 0) {
833 ::std::cout << " ";
834 }
835 if (::std::isprint(data)) {
836 ::std::cout << ::std::setfill(' ') << ::std::setw(2) << ::std::hex
837 << data;
838 } else {
839 ::std::cout << "0x" << ::std::setfill('0') << ::std::setw(2)
840 << ::std::hex << (static_cast<unsigned>(data) & 0xff);
841 }
842 }
843 ::std::cout << ::std::setfill(' ') << ::std::dec << "}" << ::std::endl;
844 ::std::cout << " }," << ::std::endl;
845 }
846 ::std::cout << " }" << ::std::endl;
847
848 ::std::cout << " Sender senders[" << memory->num_senders() << "] {" << ::std::endl;
849 for (size_t i = 0; i < memory->num_senders(); ++i) {
850 Sender *s = memory->GetSender(i);
851 ::std::cout << " [" << i << "] -> Sender {" << ::std::endl;
852 ::std::cout << " aos_mutex tid = " << PrintMutex(&s->tid)
853 << ::std::endl;
854 ::std::cout << " AtomicIndex scratch_index = "
855 << s->scratch_index.Load().DebugString() << ::std::endl;
856 ::std::cout << " AtomicIndex to_replace = "
857 << s->to_replace.Load().DebugString() << ::std::endl;
858 ::std::cout << " }" << ::std::endl;
859 }
860 ::std::cout << " }" << ::std::endl;
861
862 ::std::cout << " Watcher watchers[" << memory->num_watchers() << "] {"
863 << ::std::endl;
864 for (size_t i = 0; i < memory->num_watchers(); ++i) {
865 Watcher *w = memory->GetWatcher(i);
866 ::std::cout << " [" << i << "] -> Watcher {" << ::std::endl;
867 ::std::cout << " aos_mutex tid = " << PrintMutex(&w->tid)
868 << ::std::endl;
869 ::std::cout << " pid_t pid = " << w->pid << ::std::endl;
870 ::std::cout << " int priority = " << w->priority << ::std::endl;
871 ::std::cout << " }" << ::std::endl;
872 }
873 ::std::cout << " }" << ::std::endl;
874
875 ::std::cout << "}" << ::std::endl;
876}
877
878} // namespace ipc_lib
879} // namespace aos