blob: c1be351a42f45dac796b3349deb327f1a52d1ef5 [file] [log] [blame]
Brian Silvermandc1eb272014-08-19 14:25:59 -04001#if !AOS_DEBUG
Austin Schuh7a41be62015-10-31 13:06:55 -07002#undef NDEBUG
Brian Silvermandc1eb272014-08-19 14:25:59 -04003#define NDEBUG
4#endif
5
John Park398c74a2018-10-20 21:17:39 -07006#include "aos/ipc_lib/aos_sync.h"
Brian Silvermandc1eb272014-08-19 14:25:59 -04007
8#include <linux/futex.h>
9#include <unistd.h>
10#include <sys/syscall.h>
11#include <errno.h>
12#include <stdint.h>
13#include <limits.h>
14#include <string.h>
15#include <inttypes.h>
16#include <sys/types.h>
17#include <stddef.h>
18#include <assert.h>
19#include <pthread.h>
20#include <sched.h>
21
Brian Silverman71c55c52014-08-19 14:31:59 -040022#ifdef AOS_SANITIZER_thread
23#include <sanitizer/tsan_interface_atomic.h>
24#endif
25
Brian Silvermandc1eb272014-08-19 14:25:59 -040026#include <algorithm>
Brian Silverman71c55c52014-08-19 14:31:59 -040027#include <type_traits>
Brian Silvermandc1eb272014-08-19 14:25:59 -040028
John Park33858a32018-09-28 23:05:48 -070029#include "aos/logging/logging.h"
30#include "aos/macros.h"
31#include "aos/util/compiler_memory_barrier.h"
Sabina Davis2ed5ea22017-09-26 22:27:42 -070032#include "aos/once.h"
Brian Silverman71c55c52014-08-19 14:31:59 -040033
34using ::aos::linux_code::ipc_lib::FutexAccessorObserver;
Brian Silvermandc1eb272014-08-19 14:25:59 -040035
Brian Silverman0d8ed032016-05-31 10:37:48 -070036// This code was originally based on <https://www.akkadia.org/drepper/futex.pdf>,
Brian Silvermandc1eb272014-08-19 14:25:59 -040037// but is has since evolved a lot. However, that still has useful information.
38//
39// Finding information about actually using futexes is really REALLY hard, so
40// here's a list of the stuff that I've used:
41// futex(7) has a really high-level overview.
42// <http://locklessinc.com/articles/futex_cheat_sheet/> describes some of the
43// operations in a bit more detail than most places.
44// <http://locklessinc.com/articles/mutex_cv_futex/> is the basis of our
45// implementations (before PI).
46// <http://lwn.net/Articles/360699/> has a nice overview of futexes in late 2009
47// (fairly recent compared to everything else...).
48// <https://www.kernel.org/doc/Documentation/pi-futex.txt>,
49// <https://www.kernel.org/doc/Documentation/futex-requeue-pi.txt>,
50// <https://www.kernel.org/doc/Documentation/robust-futexes.txt>,
51// and <https://www.kernel.org/doc/Documentation/robust-futex-ABI.txt> are all
52// useful references.
53// The kernel source (kernel/futex.c) has some useful comments about what the
54// various operations do (except figuring out which argument goes where in the
55// syscall is still confusing).
56// futex(2) is basically useless except for describing the order of the
57// arguments (it only has high-level descriptions of what some of the
58// operations do, and some of them are wrong in Wheezy).
59// glibc's nptl pthreads implementation is the intended user of most of these
60// things, so it is also a good place to look for examples. However, it is all
61// very hard to read because it supports ~20 different kinds of mutexes and
62// several variations of condition variables, and some of the pieces of code
63// are only written in assembly.
64// set_robust_list(2) is wrong in Wheezy (it doesn't actually take a TID
65// argument).
66//
67// Can't use PRIVATE futex operations because they use the pid (or something) as
68// part of the hash.
69//
70// ThreadSanitizer understands how these mutexes etc work. It appears to be able
71// to figure out the happens-before relationship from the __ATOMIC_SEQ_CST
72// atomic primitives.
73//
74// Remember that EAGAIN and EWOUDBLOCK are the same! (ie if you get EAGAIN from
75// FUTEX_WAIT, the docs call it EWOULDBLOCK...)
76
77// Values for an aos_mutex.futex (kernel-mandated):
78// 0 = unlocked
79// TID = locked, not contended
80// |FUTEX_WAITERS = there are waiters (aka contended)
Brian Silverman71c55c52014-08-19 14:31:59 -040081// |FUTEX_OWNER_DIED = old owner died
Brian Silvermandc1eb272014-08-19 14:25:59 -040082//
83// Values for an aos_futex being used directly:
84// 0 = unset
85// 1 = set
86//
87// The value of an aos_condition is just a generation counter.
88
Brian Silverman71c55c52014-08-19 14:31:59 -040089// Whether or not to use the REQUEUE_PI operation. Using it is better (less
90// syscalls and the highest priority waiter is always the one that gets woken),
91// but there's a kernel bug that results in random memory corruption while using
92// them.
93// The alternative is to just wake everybody and have them all race to relock
94// the mutex (classic thundering herd).
95// Currently just whether or not we're not on ARM because we only run this on
96// ARM kernels with the patch to fix that issue applied. This will likely change
97// to something based on kernel version at some point.
98#ifdef __arm__
99#define USE_REQUEUE_PI 1
100#else
101#define USE_REQUEUE_PI 0
102#endif
103
104#ifdef AOS_SANITIZER_thread
105extern "C" void AnnotateHappensBefore(const char *file, int line,
106 uintptr_t addr);
107extern "C" void AnnotateHappensAfter(const char *file, int line,
108 uintptr_t addr);
109#define ANNOTATE_HAPPENS_BEFORE(address) \
110 AnnotateHappensBefore(__FILE__, __LINE__, \
111 reinterpret_cast<uintptr_t>(address))
112#define ANNOTATE_HAPPENS_AFTER(address) \
113 AnnotateHappensAfter(__FILE__, __LINE__, reinterpret_cast<uintptr_t>(address))
114#else
115#define ANNOTATE_HAPPENS_BEFORE(address)
116#define ANNOTATE_HAPPENS_AFTER(address)
117#endif
118
Brian Silvermandc1eb272014-08-19 14:25:59 -0400119namespace {
120
Brian Silverman71c55c52014-08-19 14:31:59 -0400121const bool kRobustListDebug = false;
122const bool kLockDebug = false;
123const bool kPrintOperations = false;
124
Brian Silvermandc1eb272014-08-19 14:25:59 -0400125// These sys_futex_* functions are wrappers around syscall(SYS_futex). They each
126// take a specific set of arguments for a given futex operation. They return the
127// result or a negated errno value. -1..-4095 mean errors and not successful
128// results, which is guaranteed by the kernel.
129//
130// They each have optimized versions for ARM EABI (the syscall interface is
131// different for non-EABI ARM, so that is the right thing to test for) that
132// don't go through syscall(2) or errno.
133// These use register variables to get the values in the right registers to
134// actually make the syscall.
135
136// The actual macro that we key off of to use the inline versions or not.
Brian Silverman17426d92018-08-09 11:38:49 -0700137#if defined(__ARM_EABI__)
138#define ARM_EABI_INLINE_SYSCALL 1
139#else
140#define ARM_EABI_INLINE_SYSCALL 0
141#endif
Brian Silvermandc1eb272014-08-19 14:25:59 -0400142
143// Used for FUTEX_WAIT, FUTEX_LOCK_PI, and FUTEX_TRYLOCK_PI.
144inline int sys_futex_wait(int op, aos_futex *addr1, int val1,
145 const struct timespec *timeout) {
146#if ARM_EABI_INLINE_SYSCALL
147 register aos_futex *addr1_reg __asm__("r0") = addr1;
148 register int op_reg __asm__("r1") = op;
149 register int val1_reg __asm__("r2") = val1;
150 register const struct timespec *timeout_reg __asm__("r3") = timeout;
151 register int syscall_number __asm__("r7") = SYS_futex;
152 register int result __asm__("r0");
153 __asm__ volatile("swi #0"
154 : "=r"(result)
155 : "r"(addr1_reg), "r"(op_reg), "r"(val1_reg),
156 "r"(timeout_reg), "r"(syscall_number)
157 : "memory");
158 return result;
159#else
160 const int r = syscall(SYS_futex, addr1, op, val1, timeout);
161 if (r == -1) return -errno;
162 return r;
163#endif
164}
165
166inline int sys_futex_wake(aos_futex *addr1, int val1) {
167#if ARM_EABI_INLINE_SYSCALL
168 register aos_futex *addr1_reg __asm__("r0") = addr1;
169 register int op_reg __asm__("r1") = FUTEX_WAKE;
170 register int val1_reg __asm__("r2") = val1;
171 register int syscall_number __asm__("r7") = SYS_futex;
172 register int result __asm__("r0");
173 __asm__ volatile("swi #0"
174 : "=r"(result)
175 : "r"(addr1_reg), "r"(op_reg), "r"(val1_reg),
176 "r"(syscall_number)
177 : "memory");
178 return result;
179#else
180 const int r = syscall(SYS_futex, addr1, FUTEX_WAKE, val1);
181 if (r == -1) return -errno;
182 return r;
183#endif
184}
185
Brian Silverman71c55c52014-08-19 14:31:59 -0400186inline int sys_futex_cmp_requeue_pi(aos_futex *addr1, int num_wake,
187 int num_requeue, aos_futex *m, uint32_t val) {
188#if ARM_EABI_INLINE_SYSCALL
189 register aos_futex *addr1_reg __asm__("r0") = addr1;
190 register int op_reg __asm__("r1") = FUTEX_CMP_REQUEUE_PI;
191 register int num_wake_reg __asm__("r2") = num_wake;
192 register int num_requeue_reg __asm__("r3") = num_requeue;
193 register aos_futex *m_reg __asm__("r4") = m;
194 register uint32_t val_reg __asm__("r5") = val;
195 register int syscall_number __asm__("r7") = SYS_futex;
196 register int result __asm__("r0");
197 __asm__ volatile("swi #0"
198 : "=r"(result)
199 : "r"(addr1_reg), "r"(op_reg), "r"(num_wake_reg),
200 "r"(num_requeue_reg), "r"(m_reg), "r"(val_reg),
201 "r"(syscall_number)
202 : "memory");
203 return result;
204#else
205 const int r = syscall(SYS_futex, addr1, FUTEX_CMP_REQUEUE_PI, num_wake,
206 num_requeue, m, val);
207 if (r == -1) return -errno;
208 return r;
209#endif
210}
211
212inline int sys_futex_wait_requeue_pi(aos_condition *addr1,
213 uint32_t start_val,
214 const struct timespec *timeout,
215 aos_futex *m) {
216#if ARM_EABI_INLINE_SYSCALL
217 register aos_condition *addr1_reg __asm__("r0") = addr1;
218 register int op_reg __asm__("r1") = FUTEX_WAIT_REQUEUE_PI;
219 register uint32_t start_val_reg __asm__("r2") = start_val;
220 register const struct timespec *timeout_reg __asm__("r3") = timeout;
221 register aos_futex *m_reg __asm__("r4") = m;
222 register int syscall_number __asm__("r7") = SYS_futex;
223 register int result __asm__("r0");
224 __asm__ volatile("swi #0"
225 : "=r"(result)
226 : "r"(addr1_reg), "r"(op_reg), "r"(start_val_reg),
227 "r"(timeout_reg), "r"(m_reg), "r"(syscall_number)
228 : "memory");
229 return result;
230#else
231 const int r =
232 syscall(SYS_futex, addr1, FUTEX_WAIT_REQUEUE_PI, start_val, timeout, m);
233 if (r == -1) return -errno;
234 return r;
235#endif
236}
237
Brian Silvermandc1eb272014-08-19 14:25:59 -0400238inline int sys_futex_unlock_pi(aos_futex *addr1) {
239#if ARM_EABI_INLINE_SYSCALL
240 register aos_futex *addr1_reg __asm__("r0") = addr1;
241 register int op_reg __asm__("r1") = FUTEX_UNLOCK_PI;
242 register int syscall_number __asm__("r7") = SYS_futex;
243 register int result __asm__("r0");
244 __asm__ volatile("swi #0"
245 : "=r"(result)
246 : "r"(addr1_reg), "r"(op_reg), "r"(syscall_number)
247 : "memory");
248 return result;
249#else
250 const int r = syscall(SYS_futex, addr1, FUTEX_UNLOCK_PI);
251 if (r == -1) return -errno;
252 return r;
253#endif
254}
255
Brian Silverman71c55c52014-08-19 14:31:59 -0400256// Returns the previous value of f.
257inline uint32_t compare_and_swap_val(aos_futex *f, uint32_t before,
258 uint32_t after) {
259#ifdef AOS_SANITIZER_thread
260 // This is a workaround for <https://llvm.org/bugs/show_bug.cgi?id=23176>.
261 // Basically, most of the atomic operations are broken under tsan, but this
262 // particular one isn't.
263 // TODO(Brian): Remove this #ifdef (and the one in compare_and_swap) once we
264 // don't have to worry about tsan with this bug any more.
265 uint32_t before_value = before;
266 __tsan_atomic32_compare_exchange_strong(
267 reinterpret_cast<int32_t *>(f),
268 reinterpret_cast<int32_t *>(&before_value), after,
269 __tsan_memory_order_seq_cst, __tsan_memory_order_seq_cst);
270 return before_value;
271#else
272 return __sync_val_compare_and_swap(f, before, after);
273#endif
Brian Silvermandc1eb272014-08-19 14:25:59 -0400274}
275
Brian Silverman71c55c52014-08-19 14:31:59 -0400276// Returns true if it succeeds and false if it fails.
277inline bool compare_and_swap(aos_futex *f, uint32_t before, uint32_t after) {
278#ifdef AOS_SANITIZER_thread
279 return compare_and_swap_val(f, before, after) == before;
280#else
281 return __sync_bool_compare_and_swap(f, before, after);
282#endif
283}
284
285#ifdef AOS_SANITIZER_thread
286
287// Simple macro for checking something which should always be true.
288// Using the standard CHECK macro isn't safe because failures often result in
289// reentering the mutex locking code, which doesn't work.
290#define SIMPLE_CHECK(expr) \
291 do { \
292 if (!(expr)) { \
293 fprintf(stderr, "%s: %d: SIMPLE_CHECK(" #expr ") failed!\n", __FILE__, \
294 __LINE__); \
295 abort(); \
296 } \
297 } while (false)
298
299// Forcibly initializes the pthread mutex for *m.
300// This sequence of operations is only safe for the simpler kinds of mutexes in
301// glibc's pthreads implementation on Linux.
302void init_pthread_mutex(aos_mutex *m) {
303 // Re-initialize the mutex so the destroy won't fail if it's locked.
304 // tsan ignores this.
305 SIMPLE_CHECK(0 == pthread_mutex_init(&m->pthread_mutex, nullptr));
306 // Destroy the mutex so tsan will forget about it if some now-dead thread
307 // locked it.
308 SIMPLE_CHECK(0 == pthread_mutex_destroy(&m->pthread_mutex));
309
310 // Now actually initialize it, making sure it's process-shareable so it works
311 // correctly across shared memory.
312 pthread_mutexattr_t attr;
313 SIMPLE_CHECK(0 == pthread_mutexattr_init(&attr));
314 SIMPLE_CHECK(0 == pthread_mutexattr_setpshared(&attr, true));
315 SIMPLE_CHECK(0 == pthread_mutex_init(&m->pthread_mutex, &attr));
316 SIMPLE_CHECK(0 == pthread_mutexattr_destroy(&attr));
317}
318
319// Locks the pthread mutex for *m.
320// If a stack trace ever reveals the pthread_mutex_lock call in here blocking,
321// there is a bug in our mutex code or the way somebody is calling it.
322void lock_pthread_mutex(aos_mutex *m) {
323 if (!m->pthread_mutex_init) {
324 init_pthread_mutex(m);
325 m->pthread_mutex_init = true;
326 }
327 SIMPLE_CHECK(0 == pthread_mutex_lock(&m->pthread_mutex));
328}
329
330// Forcibly locks the pthread mutex for *m.
331// This will (somewhat hackily) rip the lock out from underneath somebody else
332// who is already holding it.
333void force_lock_pthread_mutex(aos_mutex *m) {
334 if (!m->pthread_mutex_init) {
335 init_pthread_mutex(m);
336 m->pthread_mutex_init = true;
337 }
338 const int trylock_result = pthread_mutex_trylock(&m->pthread_mutex);
339 SIMPLE_CHECK(trylock_result == 0 || trylock_result == EBUSY);
340 if (trylock_result == 0) {
341 // We're good, so unlock it and then go for a real lock down below.
342 SIMPLE_CHECK(0 == pthread_mutex_unlock(&m->pthread_mutex));
343 } else {
344 // Somebody (should always be somebody else who died with it held) already
345 // has it, so make tsan forget about that.
346 init_pthread_mutex(m);
347 }
348 lock_pthread_mutex(m);
349}
350
351// Unlocks the pthread mutex for *m.
352void unlock_pthread_mutex(aos_mutex *m) {
353 assert(m->pthread_mutex_init);
354 SIMPLE_CHECK(0 == pthread_mutex_unlock(&m->pthread_mutex));
355}
356
357#else
358
359// Empty implementations of all these so the code below doesn't need #ifdefs.
360static inline void lock_pthread_mutex(aos_mutex *) {}
361static inline void force_lock_pthread_mutex(aos_mutex *) {}
362static inline void unlock_pthread_mutex(aos_mutex *) {}
363
364#endif
365
Brian Silvermandc1eb272014-08-19 14:25:59 -0400366pid_t do_get_tid() {
367 pid_t r = syscall(SYS_gettid);
368 assert(r > 0);
369 return r;
370}
371
372// This gets called by functions before LOG(FATAL)ing with error messages that
373// would be incorrect if the error was caused by a process forking without
374// initialize_in_new_thread getting called in the fork.
375void check_cached_tid(pid_t tid) {
376 pid_t actual = do_get_tid();
377 if (tid != actual) {
378 LOG(FATAL,
379 "task %jd forked into %jd without letting aos_sync know"
380 " so we're not really sure what's going on\n",
381 static_cast<intmax_t>(tid), static_cast<intmax_t>(actual));
382 }
383}
384
385// Starts off at 0 in each new thread (because that's what it gets initialized
386// to in most of them or it gets to reset to 0 after a fork by atfork_child()).
Brian Silverman8f373b12015-04-03 15:36:52 -0400387thread_local pid_t my_tid = 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400388
389// Gets called before the fork(2) wrapper function returns in the child.
390void atfork_child() {
391 // The next time get_tid() is called, it will set everything up again.
392 my_tid = 0;
393}
394
395void *InstallAtforkHook() {
396 if (pthread_atfork(NULL, NULL, atfork_child) != 0) {
397 PLOG(FATAL, "pthread_atfork(NULL, NULL, %p) failed", atfork_child);
398 }
399 return nullptr;
400}
401
402// This gets called to set everything up in a new thread by get_tid().
403void initialize_in_new_thread();
404
405// Gets the current thread's TID and does all of the 1-time initialization the
406// first time it's called in a given thread.
407inline uint32_t get_tid() {
Brian Silverman71c55c52014-08-19 14:31:59 -0400408 if (__builtin_expect(my_tid == 0, false)) {
Brian Silvermandc1eb272014-08-19 14:25:59 -0400409 initialize_in_new_thread();
410 }
411 static_assert(sizeof(my_tid) <= sizeof(uint32_t), "pid_t is too big");
412 return static_cast<uint32_t>(my_tid);
413}
414
Brian Silverman71c55c52014-08-19 14:31:59 -0400415// Contains all of the stuff for dealing with the robust list. Nothing outside
416// this namespace should touch anything inside it except Init, Adder, and
417// Remover.
418namespace my_robust_list {
419
420static_assert(offsetof(aos_mutex, next) == 0,
421 "Our math all assumes that the beginning of a mutex and its next "
422 "pointer are at the same place in memory.");
423
424// Our version of robust_list_head.
425// This is copied from the kernel header because that's a pretty stable ABI (and
426// any changes will be backwards compatible anyways) and we want ours to have
427// different types.
428// The uintptr_ts are &next of the elements in the list (with stuff |ed in).
429struct aos_robust_list_head {
430 uintptr_t next;
431 long futex_offset;
432 uintptr_t pending_next;
433};
434
435static_assert(offsetof(aos_robust_list_head, next) ==
436 offsetof(robust_list_head, list),
437 "Our aos_robust_list_head doesn't match the kernel's");
438static_assert(offsetof(aos_robust_list_head, futex_offset) ==
439 offsetof(robust_list_head, futex_offset),
440 "Our aos_robust_list_head doesn't match the kernel's");
441static_assert(offsetof(aos_robust_list_head, pending_next) ==
442 offsetof(robust_list_head, list_op_pending),
443 "Our aos_robust_list_head doesn't match the kernel's");
444static_assert(sizeof(aos_robust_list_head) == sizeof(robust_list_head),
445 "Our aos_robust_list_head doesn't match the kernel's");
446
447thread_local aos_robust_list_head robust_head;
448
449// Extra offset between mutex values and where we point to for their robust list
450// entries (from SetRobustListOffset).
451uintptr_t robust_list_offset = 0;
452
453// The value to OR each pointer's value with whenever putting it into the robust
454// list (technically only if it's PI, but all of ours are, so...).
455static const uintptr_t kRobustListOr = 1;
456
457// Returns the value which goes into a next variable to represent the head.
458inline uintptr_t robust_head_next_value() {
459 return reinterpret_cast<uintptr_t>(&robust_head.next);
460}
461// Returns true iff next represents the head.
462inline bool next_is_head(uintptr_t next) {
463 return next == robust_head_next_value();
464}
465// Returns the (psuedo-)mutex corresponding to the head.
466// This does NOT have a previous pointer, so be careful with the return value.
467inline aos_mutex *robust_head_mutex() {
468 return reinterpret_cast<aos_mutex *>(robust_head_next_value());
469}
470
471inline uintptr_t mutex_to_next(aos_mutex *m) {
472 return (reinterpret_cast<uintptr_t>(&m->next) + robust_list_offset) |
473 kRobustListOr;
474}
475inline aos_mutex *next_to_mutex(uintptr_t next) {
476 if (__builtin_expect(robust_list_offset != 0, false) && next_is_head(next)) {
477 // We don't offset the head pointer, so be careful.
478 return reinterpret_cast<aos_mutex *>(next);
479 }
480 return reinterpret_cast<aos_mutex *>(
481 (next & ~kRobustListOr) - robust_list_offset);
482}
483
484// Sets up the robust list for each thread.
485void Init() {
486 // It starts out just pointing back to itself.
487 robust_head.next = robust_head_next_value();
488 robust_head.futex_offset = static_cast<ssize_t>(offsetof(aos_mutex, futex)) -
489 static_cast<ssize_t>(offsetof(aos_mutex, next));
490 robust_head.pending_next = 0;
491 if (syscall(SYS_set_robust_list, robust_head_next_value(), sizeof(robust_head)) !=
492 0) {
493 PLOG(FATAL, "set_robust_list(%p, %zd) failed",
494 reinterpret_cast<void *>(robust_head.next), sizeof(robust_head));
495 }
496 if (kRobustListDebug) {
497 printf("%" PRId32 ": init done\n", get_tid());
498 }
499}
500
501// Updating the offset with locked mutexes is important during robustness
502// testing, because there are mutexes which are locked before this is set to a
503// non-0 value and then unlocked after it is changed back. However, to make sure
504// the code works correctly when manipulating the next pointer of the last of
505// those mutexes, all of their next values have to be adjusted appropriately.
506void SetRobustListOffset(uintptr_t offset) {
507 const uintptr_t offset_change = offset - robust_list_offset;
508 robust_list_offset = offset;
509 aos_mutex *m = robust_head_mutex();
510 // Update the offset contained in each of the mutexes which is already locked.
511 while (!next_is_head(m->next)) {
512 m->next += offset_change;
513 m = next_to_mutex(m->next);
514 }
515}
516
517bool HaveLockedMutexes() {
518 return robust_head.next != robust_head_next_value();
519}
520
521// Handles adding a mutex to the robust list.
522// The idea is to create one of these at the beginning of a function that needs
523// to do this and then call Add() iff it should actually be added.
524class Adder {
525 public:
526 Adder(aos_mutex *m) : m_(m) {
527 assert(robust_head.pending_next == 0);
528 if (kRobustListDebug) {
529 printf("%" PRId32 ": maybe add %p\n", get_tid(), m_);
530 }
531 robust_head.pending_next = mutex_to_next(m);
532 aos_compiler_memory_barrier();
533 }
534 ~Adder() {
535 assert(robust_head.pending_next == mutex_to_next(m_));
536 if (kRobustListDebug) {
537 printf("%" PRId32 ": done maybe add %p, n=%p p=%p\n", get_tid(), m_,
538 next_to_mutex(m_->next), m_->previous);
539 }
540 aos_compiler_memory_barrier();
541 robust_head.pending_next = 0;
542 }
543
544 void Add() {
545 assert(robust_head.pending_next == mutex_to_next(m_));
546 if (kRobustListDebug) {
547 printf("%" PRId32 ": adding %p\n", get_tid(), m_);
548 }
549 const uintptr_t old_head_next_value = robust_head.next;
550
551 m_->next = old_head_next_value;
552 aos_compiler_memory_barrier();
553 robust_head.next = mutex_to_next(m_);
554
555 m_->previous = robust_head_mutex();
556 if (!next_is_head(old_head_next_value)) {
557 // robust_head's psuedo-mutex doesn't have a previous pointer to update.
558 next_to_mutex(old_head_next_value)->previous = m_;
559 }
560 aos_compiler_memory_barrier();
561 if (kRobustListDebug) {
562 printf("%" PRId32 ": done adding %p\n", get_tid(), m_);
563 }
564 }
565
566 private:
567 aos_mutex *const m_;
568
569 DISALLOW_COPY_AND_ASSIGN(Adder);
570};
571
572// Handles removing a mutex from the robust list.
573// The idea is to create one of these at the beginning of a function that needs
574// to do this.
575class Remover {
576 public:
577 Remover(aos_mutex *m) {
578 assert(robust_head.pending_next == 0);
579 if (kRobustListDebug) {
580 printf("%" PRId32 ": beginning to remove %p, n=%p p=%p\n", get_tid(), m,
581 next_to_mutex(m->next), m->previous);
582 }
583 robust_head.pending_next = mutex_to_next(m);
584 aos_compiler_memory_barrier();
585
586 aos_mutex *const previous = m->previous;
587 const uintptr_t next_value = m->next;
588
589 previous->next = m->next;
590 if (!next_is_head(next_value)) {
591 // robust_head's psuedo-mutex doesn't have a previous pointer to update.
592 next_to_mutex(next_value)->previous = previous;
593 }
594
595 if (kRobustListDebug) {
596 printf("%" PRId32 ": done removing %p\n", get_tid(), m);
597 }
598 }
599 ~Remover() {
600 assert(robust_head.pending_next != 0);
601 aos_compiler_memory_barrier();
602 robust_head.pending_next = 0;
603 if (kRobustListDebug) {
604 printf("%" PRId32 ": done with removal\n", get_tid());
605 }
606 }
607
608 private:
609 DISALLOW_COPY_AND_ASSIGN(Remover);
610};
611
612} // namespace my_robust_list
613
Brian Silvermandc1eb272014-08-19 14:25:59 -0400614void initialize_in_new_thread() {
615 // No synchronization necessary in most of this because it's all thread-local!
616
617 my_tid = do_get_tid();
618
619 static ::aos::Once<void> atfork_hook_installed(InstallAtforkHook);
620 atfork_hook_installed.Get();
Brian Silverman71c55c52014-08-19 14:31:59 -0400621
622 my_robust_list::Init();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400623}
624
Brian Silverman71c55c52014-08-19 14:31:59 -0400625FutexAccessorObserver before_observer = nullptr, after_observer = nullptr;
626
627// RAII class which runs before_observer during construction and after_observer
628// during destruction.
629class RunObservers {
630 public:
631 template <class T>
632 RunObservers(T *address, bool write)
633 : address_(static_cast<void *>(
634 const_cast<typename ::std::remove_cv<T>::type *>(address))),
635 write_(write) {
636 if (__builtin_expect(before_observer != nullptr, false)) {
637 before_observer(address_, write_);
638 }
639 }
640 ~RunObservers() {
641 if (__builtin_expect(after_observer != nullptr, false)) {
642 after_observer(address_, write_);
643 }
644 }
645
646 private:
647 void *const address_;
648 const bool write_;
649
650 DISALLOW_COPY_AND_ASSIGN(RunObservers);
651};
652
653// Finishes the locking of a mutex by potentially clearing FUTEX_OWNER_DIED in
654// the futex and returning the correct value.
655inline int mutex_finish_lock(aos_mutex *m) {
656 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_ACQUIRE);
657 if (__builtin_expect((value & FUTEX_OWNER_DIED) != 0, false)) {
658 __atomic_and_fetch(&m->futex, ~FUTEX_OWNER_DIED, __ATOMIC_RELAXED);
659 force_lock_pthread_mutex(m);
660 return 1;
661 } else {
662 lock_pthread_mutex(m);
663 return 0;
664 }
665}
666
667// Split out separately from mutex_get so condition_wait can call it and use its
668// own my_robust_list::Adder.
Brian Silvermandc1eb272014-08-19 14:25:59 -0400669inline int mutex_do_get(aos_mutex *m, bool signals_fail,
Brian Silverman71c55c52014-08-19 14:31:59 -0400670 const struct timespec *timeout, uint32_t tid) {
671 RunObservers run_observers(m, true);
672 if (kPrintOperations) {
673 printf("%" PRId32 ": %p do_get\n", tid, m);
674 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400675
676 while (true) {
677 // If the atomic 0->TID transition fails.
678 if (!compare_and_swap(&m->futex, 0, tid)) {
679 // Wait in the kernel, which handles atomically ORing in FUTEX_WAITERS
680 // before actually sleeping.
681 const int ret = sys_futex_wait(FUTEX_LOCK_PI, &m->futex, 1, timeout);
682 if (ret != 0) {
683 if (timeout != NULL && ret == -ETIMEDOUT) {
684 return 3;
685 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400686 if (__builtin_expect(ret == -EINTR, true)) {
Brian Silvermandc1eb272014-08-19 14:25:59 -0400687 if (signals_fail) {
688 return 2;
689 } else {
690 continue;
691 }
692 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400693 my_robust_list::robust_head.pending_next = 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400694 if (ret == -EDEADLK) {
695 LOG(FATAL, "multiple lock of %p by %" PRId32 "\n", m, tid);
696 }
697 PELOG(FATAL, -ret, "FUTEX_LOCK_PI(%p(=%" PRIu32 "), 1, %p) failed",
698 &m->futex, __atomic_load_n(&m->futex, __ATOMIC_SEQ_CST), timeout);
699 } else {
Brian Silverman71c55c52014-08-19 14:31:59 -0400700 if (kLockDebug) {
701 printf("%" PRId32 ": %p kernel lock done\n", tid, m);
702 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400703 // The kernel already handled setting the value to our TID (ish).
704 break;
705 }
706 } else {
Brian Silverman71c55c52014-08-19 14:31:59 -0400707 if (kLockDebug) {
708 printf("%" PRId32 ": %p fast lock done\n", tid, m);
709 }
710 lock_pthread_mutex(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400711 // Fastpath succeeded, so no need to call into the kernel.
Brian Silverman71c55c52014-08-19 14:31:59 -0400712 // Because this is the fastpath, it's a good idea to avoid even having to
713 // load the value again down below.
714 return 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400715 }
716 }
717
Brian Silverman71c55c52014-08-19 14:31:59 -0400718 return mutex_finish_lock(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400719}
720
721// The common implementation for everything that wants to lock a mutex.
722// If signals_fail is false, the function will try again if the wait syscall is
723// interrupted by a signal.
724// timeout can be NULL for no timeout.
725inline int mutex_get(aos_mutex *m, bool signals_fail,
726 const struct timespec *timeout) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400727 const uint32_t tid = get_tid();
728 my_robust_list::Adder adder(m);
729 const int r = mutex_do_get(m, signals_fail, timeout, tid);
730 if (r == 0 || r == 1) adder.Add();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400731 return r;
732}
733
734// The common implementation for broadcast and signal.
735// number_requeue is the number of waiters to requeue (probably INT_MAX or 0). 1
736// will always be woken.
Brian Silverman71c55c52014-08-19 14:31:59 -0400737void condition_wake(aos_condition *c, aos_mutex *m, int number_requeue) {
738 RunObservers run_observers(c, true);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400739 // Make it so that anybody just going to sleep won't.
740 // This is where we might accidentally wake more than just 1 waiter with 1
741 // signal():
742 // 1 already sleeping will be woken but n might never actually make it to
743 // sleep in the kernel because of this.
Brian Silverman71c55c52014-08-19 14:31:59 -0400744 uint32_t new_value = __atomic_add_fetch(c, 1, __ATOMIC_SEQ_CST);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400745
Brian Silverman71c55c52014-08-19 14:31:59 -0400746 if (USE_REQUEUE_PI) {
747 while (true) {
748 // This really wants to be FUTEX_REQUEUE_PI, but the kernel doesn't have
749 // that... However, the code to support that is in the kernel, so it might
750 // be a good idea to patch it to support that and use it iff it's there.
751 const int ret =
752 sys_futex_cmp_requeue_pi(c, 1, number_requeue, &m->futex, new_value);
753 if (ret < 0) {
754 // If the value got changed out from under us (aka somebody else did a
755 // condition_wake).
756 if (__builtin_expect(ret == -EAGAIN, true)) {
757 // If we're doing a broadcast, the other guy might have done a signal
758 // instead, so we have to try again.
759 // If we're doing a signal, we have to go again to make sure that 2
760 // signals wake 2 processes.
761 new_value = __atomic_load_n(c, __ATOMIC_RELAXED);
762 continue;
763 }
764 my_robust_list::robust_head.pending_next = 0;
765 PELOG(FATAL, -ret, "FUTEX_CMP_REQUEUE_PI(%p, 1, %d, %p, *%p) failed",
766 c, number_requeue, &m->futex, c);
767 } else {
768 return;
769 }
770 }
771 } else {
772 const int ret = sys_futex_wake(
773 c, ::std::min(::std::max(number_requeue, 1), INT_MAX - 4096));
774 if (__builtin_expect(
775 static_cast<unsigned int>(ret) > static_cast<unsigned int>(-4096),
776 false)) {
777 my_robust_list::robust_head.pending_next = 0;
778 PELOG(FATAL, -ret, "FUTEX_WAKE(%p, %d) failed", c, INT_MAX - 4096);
779 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400780 }
781}
782
783} // namespace
784
785int mutex_lock(aos_mutex *m) {
786 return mutex_get(m, true, NULL);
787}
788int mutex_lock_timeout(aos_mutex *m, const struct timespec *timeout) {
789 return mutex_get(m, true, timeout);
790}
791int mutex_grab(aos_mutex *m) {
792 return mutex_get(m, false, NULL);
793}
794
795void mutex_unlock(aos_mutex *m) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400796 RunObservers run_observers(m, true);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400797 const uint32_t tid = get_tid();
Brian Silverman71c55c52014-08-19 14:31:59 -0400798 if (kPrintOperations) {
799 printf("%" PRId32 ": %p unlock\n", tid, m);
800 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400801
802 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_SEQ_CST);
Brian Silverman71c55c52014-08-19 14:31:59 -0400803 if (__builtin_expect((value & FUTEX_TID_MASK) != tid, false)) {
804 my_robust_list::robust_head.pending_next = 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400805 check_cached_tid(tid);
806 if ((value & FUTEX_TID_MASK) == 0) {
807 LOG(FATAL, "multiple unlock of aos_mutex %p by %" PRId32 "\n", m, tid);
808 } else {
809 LOG(FATAL, "aos_mutex %p is locked by %" PRId32 ", not %" PRId32 "\n",
810 m, value & FUTEX_TID_MASK, tid);
811 }
812 }
813
Brian Silverman71c55c52014-08-19 14:31:59 -0400814 my_robust_list::Remover remover(m);
815 unlock_pthread_mutex(m);
816
Brian Silvermandc1eb272014-08-19 14:25:59 -0400817 // If the atomic TID->0 transition fails (ie FUTEX_WAITERS is set),
818 if (!compare_and_swap(&m->futex, tid, 0)) {
819 // The kernel handles everything else.
820 const int ret = sys_futex_unlock_pi(&m->futex);
821 if (ret != 0) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400822 my_robust_list::robust_head.pending_next = 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400823 PELOG(FATAL, -ret, "FUTEX_UNLOCK_PI(%p) failed", &m->futex);
824 }
825 } else {
826 // There aren't any waiters, so no need to call into the kernel.
827 }
828}
829
830int mutex_trylock(aos_mutex *m) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400831 RunObservers run_observers(m, true);
832 const uint32_t tid = get_tid();
833 if (kPrintOperations) {
834 printf("%" PRId32 ": %p trylock\n", tid, m);
835 }
836 my_robust_list::Adder adder(m);
837
Brian Silvermandc1eb272014-08-19 14:25:59 -0400838 // Try an atomic 0->TID transition.
Brian Silverman71c55c52014-08-19 14:31:59 -0400839 uint32_t c = compare_and_swap_val(&m->futex, 0, tid);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400840
841 if (c != 0) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400842 if (__builtin_expect((c & FUTEX_OWNER_DIED) == 0, true)) {
843 // Somebody else had it locked; we failed.
844 return 4;
845 } else {
846 // FUTEX_OWNER_DIED was set, so we have to call into the kernel to deal
847 // with resetting it.
848 const int ret = sys_futex_wait(FUTEX_TRYLOCK_PI, &m->futex, 0, NULL);
849 if (ret == 0) {
850 adder.Add();
851 // Only clear the owner died if somebody else didn't do the recovery
852 // and then unlock before our TRYLOCK happened.
853 return mutex_finish_lock(m);
854 } else {
855 // EWOULDBLOCK means that somebody else beat us to it.
856 if (__builtin_expect(ret == -EWOULDBLOCK, true)) {
857 return 4;
858 }
859 my_robust_list::robust_head.pending_next = 0;
860 PELOG(FATAL, -ret, "FUTEX_TRYLOCK_PI(%p, 0, NULL) failed", &m->futex);
861 }
862 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400863 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400864
865 lock_pthread_mutex(m);
866 adder.Add();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400867 return 0;
868}
869
870bool mutex_islocked(const aos_mutex *m) {
871 const uint32_t tid = get_tid();
872
873 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_RELAXED);
874 return (value & FUTEX_TID_MASK) == tid;
875}
876
877int condition_wait(aos_condition *c, aos_mutex *m) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400878 RunObservers run_observers(c, false);
879 const uint32_t tid = get_tid();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400880 const uint32_t wait_start = __atomic_load_n(c, __ATOMIC_SEQ_CST);
881
882 mutex_unlock(m);
883
Brian Silverman71c55c52014-08-19 14:31:59 -0400884 my_robust_list::Adder adder(m);
885
Brian Silvermandc1eb272014-08-19 14:25:59 -0400886 while (true) {
887 // Wait in the kernel iff the value of it doesn't change (ie somebody else
888 // does a wake) from before we unlocked the mutex.
889 int ret;
Brian Silverman71c55c52014-08-19 14:31:59 -0400890 if (USE_REQUEUE_PI) {
891 ret = sys_futex_wait_requeue_pi(c, wait_start, nullptr, &m->futex);
892 } else {
893 ret = sys_futex_wait(FUTEX_WAIT, c, wait_start, nullptr);
894 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400895 if (ret != 0) {
896 // If it failed because somebody else did a wake and changed the value
897 // before we actually made it to sleep.
Brian Silverman71c55c52014-08-19 14:31:59 -0400898 if (__builtin_expect(ret == -EAGAIN, true)) {
899 // There's no need to unconditionally set FUTEX_WAITERS here if we're
900 // using REQUEUE_PI because the kernel automatically does that in the
901 // REQUEUE_PI iff it requeued anybody.
902 // If we're not using REQUEUE_PI, then everything is just normal locks
903 // etc, so there's no need to do anything special there either.
Brian Silvermandc1eb272014-08-19 14:25:59 -0400904
905 // We have to relock it ourself because the kernel didn't do it.
Brian Silverman71c55c52014-08-19 14:31:59 -0400906 const int r = mutex_do_get(m, false, nullptr, tid);
907 assert(__builtin_expect(r == 0 || r == 1, true));
908 adder.Add();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400909 return r;
910 }
911 // Try again if it was because of a signal.
Brian Silverman71c55c52014-08-19 14:31:59 -0400912 if (__builtin_expect(ret == -EINTR, true)) continue;
913 my_robust_list::robust_head.pending_next = 0;
914 if (USE_REQUEUE_PI) {
915 PELOG(FATAL, -ret, "FUTEX_WAIT_REQUEUE_PI(%p, %" PRIu32 ", %p) failed",
916 c, wait_start, &m->futex);
917 } else {
918 PELOG(FATAL, -ret, "FUTEX_WAIT(%p, %" PRIu32 ", nullptr) failed",
919 c, wait_start);
920 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400921 } else {
Brian Silverman71c55c52014-08-19 14:31:59 -0400922 if (USE_REQUEUE_PI) {
923 // Record that the kernel relocked it for us.
924 lock_pthread_mutex(m);
925 } else {
926 // We have to take the lock ourself because the kernel won't, but
927 // there's no need for it to be anything special because all waiters
928 // just relock it like usual.
929 const int r = mutex_do_get(m, false, nullptr, tid);
930 assert(__builtin_expect(r == 0 || r == 1, true));
931 adder.Add();
932 return r;
933 }
934
935 // We succeeded in waiting, and the kernel took care of locking the mutex
936 // for us and setting FUTEX_WAITERS iff it needed to (for REQUEUE_PI).
937
938 adder.Add();
939
940 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_RELAXED);
941 if (__builtin_expect((value & FUTEX_OWNER_DIED) != 0, false)) {
942 __atomic_and_fetch(&m->futex, ~FUTEX_OWNER_DIED, __ATOMIC_RELAXED);
943 return 1;
944 } else {
945 return 0;
946 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400947 }
948 }
949}
950
951void condition_signal(aos_condition *c, aos_mutex *m) {
952 condition_wake(c, m, 0);
953}
954
955void condition_broadcast(aos_condition *c, aos_mutex *m) {
956 condition_wake(c, m, INT_MAX);
957}
958
959int futex_wait_timeout(aos_futex *m, const struct timespec *timeout) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400960 RunObservers run_observers(m, false);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400961 const int ret = sys_futex_wait(FUTEX_WAIT, m, 0, timeout);
962 if (ret != 0) {
963 if (ret == -EINTR) {
964 return 1;
965 } else if (ret == -ETIMEDOUT) {
966 return 2;
967 } else if (ret != -EWOULDBLOCK) {
968 errno = -ret;
969 return -1;
970 }
971 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400972 ANNOTATE_HAPPENS_AFTER(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400973 return 0;
974}
975
976int futex_wait(aos_futex *m) { return futex_wait_timeout(m, NULL); }
977
978int futex_set_value(aos_futex *m, uint32_t value) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400979 RunObservers run_observers(m, false);
980 ANNOTATE_HAPPENS_BEFORE(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400981 __atomic_store_n(m, value, __ATOMIC_SEQ_CST);
982 const int r = sys_futex_wake(m, INT_MAX - 4096);
983 if (__builtin_expect(
Brian Silverman71c55c52014-08-19 14:31:59 -0400984 static_cast<unsigned int>(r) > static_cast<unsigned int>(-4096),
985 false)) {
Brian Silvermandc1eb272014-08-19 14:25:59 -0400986 errno = -r;
987 return -1;
988 } else {
989 return r;
990 }
991}
992
993int futex_set(aos_futex *m) {
994 return futex_set_value(m, 1);
995}
996
997int futex_unset(aos_futex *m) {
998 return !__atomic_exchange_n(m, 0, __ATOMIC_SEQ_CST);
999}
Brian Silverman71c55c52014-08-19 14:31:59 -04001000
1001namespace aos {
1002namespace linux_code {
1003namespace ipc_lib {
1004
1005// Sets functions to run befor eand after all futex operations.
1006// This is important when doing robustness testing because the memory has to be
1007// made writable for the whole futex operation, otherwise it never succeeds.
1008void SetFutexAccessorObservers(FutexAccessorObserver before,
1009 FutexAccessorObserver after) {
1010 before_observer = before;
1011 after_observer = after;
1012}
1013
1014// Sets an extra offset between mutexes and the value we use for them in the
1015// robust list (only the forward pointers). This is used to work around a kernel
1016// bug by keeping a second set of mutexes which is always writable so the kernel
1017// won't go into an infinite loop when trying to unlock them.
1018void SetRobustListOffset(ptrdiff_t offset) {
1019 my_robust_list::SetRobustListOffset(offset);
1020}
1021
1022// Returns true iff there are any mutexes locked by the current thread.
1023// This is mainly useful for testing.
1024bool HaveLockedMutexes() {
1025 return my_robust_list::HaveLockedMutexes();
1026}
1027
1028} // namespace ipc_lib
1029} // namespace linux_code
1030} // namespace aos