blob: 3bdba2106bd86853254041fd1047afb3734a6c2d [file] [log] [blame]
Brian Silvermandc1eb272014-08-19 14:25:59 -04001#if !AOS_DEBUG
Austin Schuh7a41be62015-10-31 13:06:55 -07002#undef NDEBUG
Brian Silvermandc1eb272014-08-19 14:25:59 -04003#define NDEBUG
4#endif
5
John Park398c74a2018-10-20 21:17:39 -07006#include "aos/ipc_lib/aos_sync.h"
Brian Silvermandc1eb272014-08-19 14:25:59 -04007
8#include <linux/futex.h>
9#include <unistd.h>
10#include <sys/syscall.h>
11#include <errno.h>
12#include <stdint.h>
13#include <limits.h>
14#include <string.h>
15#include <inttypes.h>
16#include <sys/types.h>
17#include <stddef.h>
18#include <assert.h>
19#include <pthread.h>
20#include <sched.h>
21
Brian Silverman71c55c52014-08-19 14:31:59 -040022#ifdef AOS_SANITIZER_thread
23#include <sanitizer/tsan_interface_atomic.h>
24#endif
25
Brian Silvermandc1eb272014-08-19 14:25:59 -040026#include <algorithm>
Brian Silverman71c55c52014-08-19 14:31:59 -040027#include <type_traits>
Brian Silvermandc1eb272014-08-19 14:25:59 -040028
John Park33858a32018-09-28 23:05:48 -070029#include "aos/logging/logging.h"
30#include "aos/macros.h"
31#include "aos/util/compiler_memory_barrier.h"
John Park0e699502019-11-20 19:36:05 -080032#include "absl/base/call_once.h"
Brian Silverman71c55c52014-08-19 14:31:59 -040033
34using ::aos::linux_code::ipc_lib::FutexAccessorObserver;
Brian Silvermandc1eb272014-08-19 14:25:59 -040035
Brian Silverman0d8ed032016-05-31 10:37:48 -070036// This code was originally based on <https://www.akkadia.org/drepper/futex.pdf>,
Brian Silvermandc1eb272014-08-19 14:25:59 -040037// but is has since evolved a lot. However, that still has useful information.
38//
39// Finding information about actually using futexes is really REALLY hard, so
40// here's a list of the stuff that I've used:
41// futex(7) has a really high-level overview.
42// <http://locklessinc.com/articles/futex_cheat_sheet/> describes some of the
43// operations in a bit more detail than most places.
44// <http://locklessinc.com/articles/mutex_cv_futex/> is the basis of our
45// implementations (before PI).
46// <http://lwn.net/Articles/360699/> has a nice overview of futexes in late 2009
47// (fairly recent compared to everything else...).
48// <https://www.kernel.org/doc/Documentation/pi-futex.txt>,
49// <https://www.kernel.org/doc/Documentation/futex-requeue-pi.txt>,
50// <https://www.kernel.org/doc/Documentation/robust-futexes.txt>,
51// and <https://www.kernel.org/doc/Documentation/robust-futex-ABI.txt> are all
52// useful references.
53// The kernel source (kernel/futex.c) has some useful comments about what the
54// various operations do (except figuring out which argument goes where in the
55// syscall is still confusing).
56// futex(2) is basically useless except for describing the order of the
57// arguments (it only has high-level descriptions of what some of the
58// operations do, and some of them are wrong in Wheezy).
59// glibc's nptl pthreads implementation is the intended user of most of these
60// things, so it is also a good place to look for examples. However, it is all
61// very hard to read because it supports ~20 different kinds of mutexes and
62// several variations of condition variables, and some of the pieces of code
63// are only written in assembly.
64// set_robust_list(2) is wrong in Wheezy (it doesn't actually take a TID
65// argument).
66//
67// Can't use PRIVATE futex operations because they use the pid (or something) as
68// part of the hash.
69//
70// ThreadSanitizer understands how these mutexes etc work. It appears to be able
71// to figure out the happens-before relationship from the __ATOMIC_SEQ_CST
72// atomic primitives.
73//
74// Remember that EAGAIN and EWOUDBLOCK are the same! (ie if you get EAGAIN from
75// FUTEX_WAIT, the docs call it EWOULDBLOCK...)
76
77// Values for an aos_mutex.futex (kernel-mandated):
78// 0 = unlocked
79// TID = locked, not contended
80// |FUTEX_WAITERS = there are waiters (aka contended)
Brian Silverman71c55c52014-08-19 14:31:59 -040081// |FUTEX_OWNER_DIED = old owner died
Brian Silvermandc1eb272014-08-19 14:25:59 -040082//
83// Values for an aos_futex being used directly:
84// 0 = unset
85// 1 = set
86//
87// The value of an aos_condition is just a generation counter.
88
Brian Silverman71c55c52014-08-19 14:31:59 -040089#ifdef AOS_SANITIZER_thread
90extern "C" void AnnotateHappensBefore(const char *file, int line,
91 uintptr_t addr);
92extern "C" void AnnotateHappensAfter(const char *file, int line,
93 uintptr_t addr);
94#define ANNOTATE_HAPPENS_BEFORE(address) \
95 AnnotateHappensBefore(__FILE__, __LINE__, \
96 reinterpret_cast<uintptr_t>(address))
97#define ANNOTATE_HAPPENS_AFTER(address) \
98 AnnotateHappensAfter(__FILE__, __LINE__, reinterpret_cast<uintptr_t>(address))
99#else
100#define ANNOTATE_HAPPENS_BEFORE(address)
101#define ANNOTATE_HAPPENS_AFTER(address)
102#endif
103
Brian Silvermandc1eb272014-08-19 14:25:59 -0400104namespace {
105
Brian Silverman71c55c52014-08-19 14:31:59 -0400106const bool kRobustListDebug = false;
107const bool kLockDebug = false;
108const bool kPrintOperations = false;
109
Brian Silvermandc1eb272014-08-19 14:25:59 -0400110// These sys_futex_* functions are wrappers around syscall(SYS_futex). They each
111// take a specific set of arguments for a given futex operation. They return the
112// result or a negated errno value. -1..-4095 mean errors and not successful
113// results, which is guaranteed by the kernel.
114//
115// They each have optimized versions for ARM EABI (the syscall interface is
116// different for non-EABI ARM, so that is the right thing to test for) that
117// don't go through syscall(2) or errno.
118// These use register variables to get the values in the right registers to
119// actually make the syscall.
120
121// The actual macro that we key off of to use the inline versions or not.
Brian Silverman17426d92018-08-09 11:38:49 -0700122#if defined(__ARM_EABI__)
123#define ARM_EABI_INLINE_SYSCALL 1
124#else
125#define ARM_EABI_INLINE_SYSCALL 0
126#endif
Brian Silvermandc1eb272014-08-19 14:25:59 -0400127
128// Used for FUTEX_WAIT, FUTEX_LOCK_PI, and FUTEX_TRYLOCK_PI.
129inline int sys_futex_wait(int op, aos_futex *addr1, int val1,
130 const struct timespec *timeout) {
131#if ARM_EABI_INLINE_SYSCALL
132 register aos_futex *addr1_reg __asm__("r0") = addr1;
133 register int op_reg __asm__("r1") = op;
134 register int val1_reg __asm__("r2") = val1;
135 register const struct timespec *timeout_reg __asm__("r3") = timeout;
136 register int syscall_number __asm__("r7") = SYS_futex;
137 register int result __asm__("r0");
138 __asm__ volatile("swi #0"
139 : "=r"(result)
140 : "r"(addr1_reg), "r"(op_reg), "r"(val1_reg),
141 "r"(timeout_reg), "r"(syscall_number)
142 : "memory");
143 return result;
144#else
145 const int r = syscall(SYS_futex, addr1, op, val1, timeout);
146 if (r == -1) return -errno;
147 return r;
148#endif
149}
150
151inline int sys_futex_wake(aos_futex *addr1, int val1) {
152#if ARM_EABI_INLINE_SYSCALL
153 register aos_futex *addr1_reg __asm__("r0") = addr1;
154 register int op_reg __asm__("r1") = FUTEX_WAKE;
155 register int val1_reg __asm__("r2") = val1;
156 register int syscall_number __asm__("r7") = SYS_futex;
157 register int result __asm__("r0");
158 __asm__ volatile("swi #0"
159 : "=r"(result)
160 : "r"(addr1_reg), "r"(op_reg), "r"(val1_reg),
161 "r"(syscall_number)
162 : "memory");
163 return result;
164#else
165 const int r = syscall(SYS_futex, addr1, FUTEX_WAKE, val1);
166 if (r == -1) return -errno;
167 return r;
168#endif
169}
170
Brian Silverman71c55c52014-08-19 14:31:59 -0400171inline int sys_futex_cmp_requeue_pi(aos_futex *addr1, int num_wake,
172 int num_requeue, aos_futex *m, uint32_t val) {
173#if ARM_EABI_INLINE_SYSCALL
174 register aos_futex *addr1_reg __asm__("r0") = addr1;
175 register int op_reg __asm__("r1") = FUTEX_CMP_REQUEUE_PI;
176 register int num_wake_reg __asm__("r2") = num_wake;
177 register int num_requeue_reg __asm__("r3") = num_requeue;
178 register aos_futex *m_reg __asm__("r4") = m;
179 register uint32_t val_reg __asm__("r5") = val;
180 register int syscall_number __asm__("r7") = SYS_futex;
181 register int result __asm__("r0");
182 __asm__ volatile("swi #0"
183 : "=r"(result)
184 : "r"(addr1_reg), "r"(op_reg), "r"(num_wake_reg),
185 "r"(num_requeue_reg), "r"(m_reg), "r"(val_reg),
186 "r"(syscall_number)
187 : "memory");
188 return result;
189#else
190 const int r = syscall(SYS_futex, addr1, FUTEX_CMP_REQUEUE_PI, num_wake,
191 num_requeue, m, val);
192 if (r == -1) return -errno;
193 return r;
194#endif
195}
196
197inline int sys_futex_wait_requeue_pi(aos_condition *addr1,
198 uint32_t start_val,
199 const struct timespec *timeout,
200 aos_futex *m) {
201#if ARM_EABI_INLINE_SYSCALL
202 register aos_condition *addr1_reg __asm__("r0") = addr1;
203 register int op_reg __asm__("r1") = FUTEX_WAIT_REQUEUE_PI;
204 register uint32_t start_val_reg __asm__("r2") = start_val;
205 register const struct timespec *timeout_reg __asm__("r3") = timeout;
206 register aos_futex *m_reg __asm__("r4") = m;
207 register int syscall_number __asm__("r7") = SYS_futex;
208 register int result __asm__("r0");
209 __asm__ volatile("swi #0"
210 : "=r"(result)
211 : "r"(addr1_reg), "r"(op_reg), "r"(start_val_reg),
212 "r"(timeout_reg), "r"(m_reg), "r"(syscall_number)
213 : "memory");
214 return result;
215#else
216 const int r =
217 syscall(SYS_futex, addr1, FUTEX_WAIT_REQUEUE_PI, start_val, timeout, m);
218 if (r == -1) return -errno;
219 return r;
220#endif
221}
222
Brian Silvermandc1eb272014-08-19 14:25:59 -0400223inline int sys_futex_unlock_pi(aos_futex *addr1) {
224#if ARM_EABI_INLINE_SYSCALL
225 register aos_futex *addr1_reg __asm__("r0") = addr1;
226 register int op_reg __asm__("r1") = FUTEX_UNLOCK_PI;
227 register int syscall_number __asm__("r7") = SYS_futex;
228 register int result __asm__("r0");
229 __asm__ volatile("swi #0"
230 : "=r"(result)
231 : "r"(addr1_reg), "r"(op_reg), "r"(syscall_number)
232 : "memory");
233 return result;
234#else
235 const int r = syscall(SYS_futex, addr1, FUTEX_UNLOCK_PI);
236 if (r == -1) return -errno;
237 return r;
238#endif
239}
240
Brian Silverman71c55c52014-08-19 14:31:59 -0400241// Returns the previous value of f.
242inline uint32_t compare_and_swap_val(aos_futex *f, uint32_t before,
243 uint32_t after) {
244#ifdef AOS_SANITIZER_thread
245 // This is a workaround for <https://llvm.org/bugs/show_bug.cgi?id=23176>.
246 // Basically, most of the atomic operations are broken under tsan, but this
247 // particular one isn't.
248 // TODO(Brian): Remove this #ifdef (and the one in compare_and_swap) once we
249 // don't have to worry about tsan with this bug any more.
250 uint32_t before_value = before;
251 __tsan_atomic32_compare_exchange_strong(
252 reinterpret_cast<int32_t *>(f),
253 reinterpret_cast<int32_t *>(&before_value), after,
254 __tsan_memory_order_seq_cst, __tsan_memory_order_seq_cst);
255 return before_value;
256#else
257 return __sync_val_compare_and_swap(f, before, after);
258#endif
Brian Silvermandc1eb272014-08-19 14:25:59 -0400259}
260
Brian Silverman71c55c52014-08-19 14:31:59 -0400261// Returns true if it succeeds and false if it fails.
262inline bool compare_and_swap(aos_futex *f, uint32_t before, uint32_t after) {
263#ifdef AOS_SANITIZER_thread
264 return compare_and_swap_val(f, before, after) == before;
265#else
266 return __sync_bool_compare_and_swap(f, before, after);
267#endif
268}
269
270#ifdef AOS_SANITIZER_thread
271
272// Simple macro for checking something which should always be true.
273// Using the standard CHECK macro isn't safe because failures often result in
274// reentering the mutex locking code, which doesn't work.
275#define SIMPLE_CHECK(expr) \
276 do { \
277 if (!(expr)) { \
278 fprintf(stderr, "%s: %d: SIMPLE_CHECK(" #expr ") failed!\n", __FILE__, \
279 __LINE__); \
280 abort(); \
281 } \
282 } while (false)
283
284// Forcibly initializes the pthread mutex for *m.
285// This sequence of operations is only safe for the simpler kinds of mutexes in
286// glibc's pthreads implementation on Linux.
287void init_pthread_mutex(aos_mutex *m) {
288 // Re-initialize the mutex so the destroy won't fail if it's locked.
289 // tsan ignores this.
290 SIMPLE_CHECK(0 == pthread_mutex_init(&m->pthread_mutex, nullptr));
291 // Destroy the mutex so tsan will forget about it if some now-dead thread
292 // locked it.
293 SIMPLE_CHECK(0 == pthread_mutex_destroy(&m->pthread_mutex));
294
295 // Now actually initialize it, making sure it's process-shareable so it works
296 // correctly across shared memory.
297 pthread_mutexattr_t attr;
298 SIMPLE_CHECK(0 == pthread_mutexattr_init(&attr));
299 SIMPLE_CHECK(0 == pthread_mutexattr_setpshared(&attr, true));
300 SIMPLE_CHECK(0 == pthread_mutex_init(&m->pthread_mutex, &attr));
301 SIMPLE_CHECK(0 == pthread_mutexattr_destroy(&attr));
302}
303
304// Locks the pthread mutex for *m.
305// If a stack trace ever reveals the pthread_mutex_lock call in here blocking,
306// there is a bug in our mutex code or the way somebody is calling it.
307void lock_pthread_mutex(aos_mutex *m) {
308 if (!m->pthread_mutex_init) {
309 init_pthread_mutex(m);
310 m->pthread_mutex_init = true;
311 }
312 SIMPLE_CHECK(0 == pthread_mutex_lock(&m->pthread_mutex));
313}
314
315// Forcibly locks the pthread mutex for *m.
316// This will (somewhat hackily) rip the lock out from underneath somebody else
317// who is already holding it.
318void force_lock_pthread_mutex(aos_mutex *m) {
319 if (!m->pthread_mutex_init) {
320 init_pthread_mutex(m);
321 m->pthread_mutex_init = true;
322 }
323 const int trylock_result = pthread_mutex_trylock(&m->pthread_mutex);
324 SIMPLE_CHECK(trylock_result == 0 || trylock_result == EBUSY);
325 if (trylock_result == 0) {
326 // We're good, so unlock it and then go for a real lock down below.
327 SIMPLE_CHECK(0 == pthread_mutex_unlock(&m->pthread_mutex));
328 } else {
329 // Somebody (should always be somebody else who died with it held) already
330 // has it, so make tsan forget about that.
331 init_pthread_mutex(m);
332 }
333 lock_pthread_mutex(m);
334}
335
336// Unlocks the pthread mutex for *m.
337void unlock_pthread_mutex(aos_mutex *m) {
338 assert(m->pthread_mutex_init);
339 SIMPLE_CHECK(0 == pthread_mutex_unlock(&m->pthread_mutex));
340}
341
342#else
343
344// Empty implementations of all these so the code below doesn't need #ifdefs.
345static inline void lock_pthread_mutex(aos_mutex *) {}
346static inline void force_lock_pthread_mutex(aos_mutex *) {}
347static inline void unlock_pthread_mutex(aos_mutex *) {}
348
349#endif
350
Brian Silvermandc1eb272014-08-19 14:25:59 -0400351pid_t do_get_tid() {
352 pid_t r = syscall(SYS_gettid);
353 assert(r > 0);
354 return r;
355}
356
Austin Schuhf257f3c2019-10-27 21:00:43 -0700357// This gets called by functions before AOS_LOG(FATAL)ing with error messages
358// that would be incorrect if the error was caused by a process forking without
Brian Silvermandc1eb272014-08-19 14:25:59 -0400359// initialize_in_new_thread getting called in the fork.
360void check_cached_tid(pid_t tid) {
361 pid_t actual = do_get_tid();
362 if (tid != actual) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700363 AOS_LOG(FATAL,
364 "task %jd forked into %jd without letting aos_sync know"
365 " so we're not really sure what's going on\n",
366 static_cast<intmax_t>(tid), static_cast<intmax_t>(actual));
Brian Silvermandc1eb272014-08-19 14:25:59 -0400367 }
368}
369
370// Starts off at 0 in each new thread (because that's what it gets initialized
371// to in most of them or it gets to reset to 0 after a fork by atfork_child()).
Brian Silverman8f373b12015-04-03 15:36:52 -0400372thread_local pid_t my_tid = 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400373
374// Gets called before the fork(2) wrapper function returns in the child.
375void atfork_child() {
376 // The next time get_tid() is called, it will set everything up again.
377 my_tid = 0;
378}
379
John Park0e699502019-11-20 19:36:05 -0800380void InstallAtforkHook() {
Brian Silvermandc1eb272014-08-19 14:25:59 -0400381 if (pthread_atfork(NULL, NULL, atfork_child) != 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700382 AOS_PLOG(FATAL, "pthread_atfork(NULL, NULL, %p) failed", atfork_child);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400383 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400384}
385
386// This gets called to set everything up in a new thread by get_tid().
387void initialize_in_new_thread();
388
389// Gets the current thread's TID and does all of the 1-time initialization the
390// first time it's called in a given thread.
391inline uint32_t get_tid() {
Brian Silverman71c55c52014-08-19 14:31:59 -0400392 if (__builtin_expect(my_tid == 0, false)) {
Brian Silvermandc1eb272014-08-19 14:25:59 -0400393 initialize_in_new_thread();
394 }
395 static_assert(sizeof(my_tid) <= sizeof(uint32_t), "pid_t is too big");
396 return static_cast<uint32_t>(my_tid);
397}
398
Brian Silverman71c55c52014-08-19 14:31:59 -0400399// Contains all of the stuff for dealing with the robust list. Nothing outside
400// this namespace should touch anything inside it except Init, Adder, and
401// Remover.
402namespace my_robust_list {
403
404static_assert(offsetof(aos_mutex, next) == 0,
405 "Our math all assumes that the beginning of a mutex and its next "
406 "pointer are at the same place in memory.");
407
408// Our version of robust_list_head.
409// This is copied from the kernel header because that's a pretty stable ABI (and
410// any changes will be backwards compatible anyways) and we want ours to have
411// different types.
412// The uintptr_ts are &next of the elements in the list (with stuff |ed in).
413struct aos_robust_list_head {
414 uintptr_t next;
415 long futex_offset;
416 uintptr_t pending_next;
417};
418
419static_assert(offsetof(aos_robust_list_head, next) ==
420 offsetof(robust_list_head, list),
421 "Our aos_robust_list_head doesn't match the kernel's");
422static_assert(offsetof(aos_robust_list_head, futex_offset) ==
423 offsetof(robust_list_head, futex_offset),
424 "Our aos_robust_list_head doesn't match the kernel's");
425static_assert(offsetof(aos_robust_list_head, pending_next) ==
426 offsetof(robust_list_head, list_op_pending),
427 "Our aos_robust_list_head doesn't match the kernel's");
428static_assert(sizeof(aos_robust_list_head) == sizeof(robust_list_head),
429 "Our aos_robust_list_head doesn't match the kernel's");
430
431thread_local aos_robust_list_head robust_head;
432
433// Extra offset between mutex values and where we point to for their robust list
434// entries (from SetRobustListOffset).
435uintptr_t robust_list_offset = 0;
436
437// The value to OR each pointer's value with whenever putting it into the robust
438// list (technically only if it's PI, but all of ours are, so...).
439static const uintptr_t kRobustListOr = 1;
440
441// Returns the value which goes into a next variable to represent the head.
442inline uintptr_t robust_head_next_value() {
443 return reinterpret_cast<uintptr_t>(&robust_head.next);
444}
445// Returns true iff next represents the head.
446inline bool next_is_head(uintptr_t next) {
447 return next == robust_head_next_value();
448}
449// Returns the (psuedo-)mutex corresponding to the head.
450// This does NOT have a previous pointer, so be careful with the return value.
451inline aos_mutex *robust_head_mutex() {
452 return reinterpret_cast<aos_mutex *>(robust_head_next_value());
453}
454
455inline uintptr_t mutex_to_next(aos_mutex *m) {
456 return (reinterpret_cast<uintptr_t>(&m->next) + robust_list_offset) |
457 kRobustListOr;
458}
459inline aos_mutex *next_to_mutex(uintptr_t next) {
460 if (__builtin_expect(robust_list_offset != 0, false) && next_is_head(next)) {
461 // We don't offset the head pointer, so be careful.
462 return reinterpret_cast<aos_mutex *>(next);
463 }
464 return reinterpret_cast<aos_mutex *>(
465 (next & ~kRobustListOr) - robust_list_offset);
466}
467
468// Sets up the robust list for each thread.
469void Init() {
470 // It starts out just pointing back to itself.
471 robust_head.next = robust_head_next_value();
472 robust_head.futex_offset = static_cast<ssize_t>(offsetof(aos_mutex, futex)) -
473 static_cast<ssize_t>(offsetof(aos_mutex, next));
474 robust_head.pending_next = 0;
475 if (syscall(SYS_set_robust_list, robust_head_next_value(), sizeof(robust_head)) !=
476 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700477 AOS_PLOG(FATAL, "set_robust_list(%p, %zd) failed",
478 reinterpret_cast<void *>(robust_head.next), sizeof(robust_head));
Brian Silverman71c55c52014-08-19 14:31:59 -0400479 }
480 if (kRobustListDebug) {
481 printf("%" PRId32 ": init done\n", get_tid());
482 }
483}
484
485// Updating the offset with locked mutexes is important during robustness
486// testing, because there are mutexes which are locked before this is set to a
487// non-0 value and then unlocked after it is changed back. However, to make sure
488// the code works correctly when manipulating the next pointer of the last of
489// those mutexes, all of their next values have to be adjusted appropriately.
490void SetRobustListOffset(uintptr_t offset) {
491 const uintptr_t offset_change = offset - robust_list_offset;
492 robust_list_offset = offset;
493 aos_mutex *m = robust_head_mutex();
494 // Update the offset contained in each of the mutexes which is already locked.
495 while (!next_is_head(m->next)) {
496 m->next += offset_change;
497 m = next_to_mutex(m->next);
498 }
499}
500
501bool HaveLockedMutexes() {
502 return robust_head.next != robust_head_next_value();
503}
504
505// Handles adding a mutex to the robust list.
506// The idea is to create one of these at the beginning of a function that needs
507// to do this and then call Add() iff it should actually be added.
508class Adder {
509 public:
510 Adder(aos_mutex *m) : m_(m) {
511 assert(robust_head.pending_next == 0);
512 if (kRobustListDebug) {
513 printf("%" PRId32 ": maybe add %p\n", get_tid(), m_);
514 }
515 robust_head.pending_next = mutex_to_next(m);
516 aos_compiler_memory_barrier();
517 }
518 ~Adder() {
519 assert(robust_head.pending_next == mutex_to_next(m_));
520 if (kRobustListDebug) {
521 printf("%" PRId32 ": done maybe add %p, n=%p p=%p\n", get_tid(), m_,
522 next_to_mutex(m_->next), m_->previous);
523 }
524 aos_compiler_memory_barrier();
525 robust_head.pending_next = 0;
526 }
527
528 void Add() {
529 assert(robust_head.pending_next == mutex_to_next(m_));
530 if (kRobustListDebug) {
531 printf("%" PRId32 ": adding %p\n", get_tid(), m_);
532 }
533 const uintptr_t old_head_next_value = robust_head.next;
534
535 m_->next = old_head_next_value;
536 aos_compiler_memory_barrier();
537 robust_head.next = mutex_to_next(m_);
538
539 m_->previous = robust_head_mutex();
540 if (!next_is_head(old_head_next_value)) {
541 // robust_head's psuedo-mutex doesn't have a previous pointer to update.
542 next_to_mutex(old_head_next_value)->previous = m_;
543 }
544 aos_compiler_memory_barrier();
545 if (kRobustListDebug) {
546 printf("%" PRId32 ": done adding %p\n", get_tid(), m_);
547 }
548 }
549
550 private:
551 aos_mutex *const m_;
552
553 DISALLOW_COPY_AND_ASSIGN(Adder);
554};
555
556// Handles removing a mutex from the robust list.
557// The idea is to create one of these at the beginning of a function that needs
558// to do this.
559class Remover {
560 public:
561 Remover(aos_mutex *m) {
562 assert(robust_head.pending_next == 0);
563 if (kRobustListDebug) {
564 printf("%" PRId32 ": beginning to remove %p, n=%p p=%p\n", get_tid(), m,
565 next_to_mutex(m->next), m->previous);
566 }
567 robust_head.pending_next = mutex_to_next(m);
568 aos_compiler_memory_barrier();
569
570 aos_mutex *const previous = m->previous;
571 const uintptr_t next_value = m->next;
572
573 previous->next = m->next;
574 if (!next_is_head(next_value)) {
575 // robust_head's psuedo-mutex doesn't have a previous pointer to update.
576 next_to_mutex(next_value)->previous = previous;
577 }
578
579 if (kRobustListDebug) {
580 printf("%" PRId32 ": done removing %p\n", get_tid(), m);
581 }
582 }
583 ~Remover() {
584 assert(robust_head.pending_next != 0);
585 aos_compiler_memory_barrier();
586 robust_head.pending_next = 0;
587 if (kRobustListDebug) {
588 printf("%" PRId32 ": done with removal\n", get_tid());
589 }
590 }
591
592 private:
593 DISALLOW_COPY_AND_ASSIGN(Remover);
594};
595
596} // namespace my_robust_list
597
Brian Silvermandc1eb272014-08-19 14:25:59 -0400598void initialize_in_new_thread() {
599 // No synchronization necessary in most of this because it's all thread-local!
600
601 my_tid = do_get_tid();
602
John Park9372a682019-11-27 18:07:48 -0800603 static absl::once_flag once;
604 absl::call_once(once, InstallAtforkHook);
Brian Silverman71c55c52014-08-19 14:31:59 -0400605
606 my_robust_list::Init();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400607}
608
Brian Silverman71c55c52014-08-19 14:31:59 -0400609FutexAccessorObserver before_observer = nullptr, after_observer = nullptr;
610
611// RAII class which runs before_observer during construction and after_observer
612// during destruction.
613class RunObservers {
614 public:
615 template <class T>
616 RunObservers(T *address, bool write)
617 : address_(static_cast<void *>(
618 const_cast<typename ::std::remove_cv<T>::type *>(address))),
619 write_(write) {
620 if (__builtin_expect(before_observer != nullptr, false)) {
621 before_observer(address_, write_);
622 }
623 }
624 ~RunObservers() {
625 if (__builtin_expect(after_observer != nullptr, false)) {
626 after_observer(address_, write_);
627 }
628 }
629
630 private:
631 void *const address_;
632 const bool write_;
633
634 DISALLOW_COPY_AND_ASSIGN(RunObservers);
635};
636
637// Finishes the locking of a mutex by potentially clearing FUTEX_OWNER_DIED in
638// the futex and returning the correct value.
639inline int mutex_finish_lock(aos_mutex *m) {
640 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_ACQUIRE);
641 if (__builtin_expect((value & FUTEX_OWNER_DIED) != 0, false)) {
642 __atomic_and_fetch(&m->futex, ~FUTEX_OWNER_DIED, __ATOMIC_RELAXED);
643 force_lock_pthread_mutex(m);
644 return 1;
645 } else {
646 lock_pthread_mutex(m);
647 return 0;
648 }
649}
650
651// Split out separately from mutex_get so condition_wait can call it and use its
652// own my_robust_list::Adder.
Brian Silvermandc1eb272014-08-19 14:25:59 -0400653inline int mutex_do_get(aos_mutex *m, bool signals_fail,
Brian Silverman71c55c52014-08-19 14:31:59 -0400654 const struct timespec *timeout, uint32_t tid) {
655 RunObservers run_observers(m, true);
656 if (kPrintOperations) {
657 printf("%" PRId32 ": %p do_get\n", tid, m);
658 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400659
660 while (true) {
661 // If the atomic 0->TID transition fails.
662 if (!compare_and_swap(&m->futex, 0, tid)) {
663 // Wait in the kernel, which handles atomically ORing in FUTEX_WAITERS
664 // before actually sleeping.
665 const int ret = sys_futex_wait(FUTEX_LOCK_PI, &m->futex, 1, timeout);
666 if (ret != 0) {
667 if (timeout != NULL && ret == -ETIMEDOUT) {
668 return 3;
669 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400670 if (__builtin_expect(ret == -EINTR, true)) {
Brian Silvermandc1eb272014-08-19 14:25:59 -0400671 if (signals_fail) {
672 return 2;
673 } else {
674 continue;
675 }
676 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400677 my_robust_list::robust_head.pending_next = 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400678 if (ret == -EDEADLK) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700679 AOS_LOG(FATAL, "multiple lock of %p by %" PRId32 "\n", m, tid);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400680 }
Austin Schuhf257f3c2019-10-27 21:00:43 -0700681 AOS_PELOG(FATAL, -ret, "FUTEX_LOCK_PI(%p(=%" PRIu32 "), 1, %p) failed",
682 &m->futex, __atomic_load_n(&m->futex, __ATOMIC_SEQ_CST),
683 timeout);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400684 } else {
Brian Silverman71c55c52014-08-19 14:31:59 -0400685 if (kLockDebug) {
686 printf("%" PRId32 ": %p kernel lock done\n", tid, m);
687 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400688 // The kernel already handled setting the value to our TID (ish).
689 break;
690 }
691 } else {
Brian Silverman71c55c52014-08-19 14:31:59 -0400692 if (kLockDebug) {
693 printf("%" PRId32 ": %p fast lock done\n", tid, m);
694 }
695 lock_pthread_mutex(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400696 // Fastpath succeeded, so no need to call into the kernel.
Brian Silverman71c55c52014-08-19 14:31:59 -0400697 // Because this is the fastpath, it's a good idea to avoid even having to
698 // load the value again down below.
699 return 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400700 }
701 }
702
Brian Silverman71c55c52014-08-19 14:31:59 -0400703 return mutex_finish_lock(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400704}
705
706// The common implementation for everything that wants to lock a mutex.
707// If signals_fail is false, the function will try again if the wait syscall is
708// interrupted by a signal.
709// timeout can be NULL for no timeout.
710inline int mutex_get(aos_mutex *m, bool signals_fail,
711 const struct timespec *timeout) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400712 const uint32_t tid = get_tid();
713 my_robust_list::Adder adder(m);
714 const int r = mutex_do_get(m, signals_fail, timeout, tid);
715 if (r == 0 || r == 1) adder.Add();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400716 return r;
717}
718
719// The common implementation for broadcast and signal.
720// number_requeue is the number of waiters to requeue (probably INT_MAX or 0). 1
721// will always be woken.
Brian Silverman71c55c52014-08-19 14:31:59 -0400722void condition_wake(aos_condition *c, aos_mutex *m, int number_requeue) {
723 RunObservers run_observers(c, true);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400724 // Make it so that anybody just going to sleep won't.
725 // This is where we might accidentally wake more than just 1 waiter with 1
726 // signal():
727 // 1 already sleeping will be woken but n might never actually make it to
728 // sleep in the kernel because of this.
Brian Silverman71c55c52014-08-19 14:31:59 -0400729 uint32_t new_value = __atomic_add_fetch(c, 1, __ATOMIC_SEQ_CST);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400730
Brian2a4294f2019-06-12 20:23:32 -0700731 while (true) {
732 // This really wants to be FUTEX_REQUEUE_PI, but the kernel doesn't have
733 // that... However, the code to support that is in the kernel, so it might
734 // be a good idea to patch it to support that and use it iff it's there.
735 const int ret =
736 sys_futex_cmp_requeue_pi(c, 1, number_requeue, &m->futex, new_value);
737 if (ret < 0) {
738 // If the value got changed out from under us (aka somebody else did a
739 // condition_wake).
740 if (__builtin_expect(ret == -EAGAIN, true)) {
741 // If we're doing a broadcast, the other guy might have done a signal
742 // instead, so we have to try again.
743 // If we're doing a signal, we have to go again to make sure that 2
744 // signals wake 2 processes.
745 new_value = __atomic_load_n(c, __ATOMIC_RELAXED);
746 continue;
Brian Silverman71c55c52014-08-19 14:31:59 -0400747 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400748 my_robust_list::robust_head.pending_next = 0;
Austin Schuhf257f3c2019-10-27 21:00:43 -0700749 AOS_PELOG(FATAL, -ret, "FUTEX_CMP_REQUEUE_PI(%p, 1, %d, %p, *%p) failed",
750 c, number_requeue, &m->futex, c);
Brian2a4294f2019-06-12 20:23:32 -0700751 } else {
752 return;
Brian Silverman71c55c52014-08-19 14:31:59 -0400753 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400754 }
755}
756
757} // namespace
758
759int mutex_lock(aos_mutex *m) {
760 return mutex_get(m, true, NULL);
761}
762int mutex_lock_timeout(aos_mutex *m, const struct timespec *timeout) {
763 return mutex_get(m, true, timeout);
764}
765int mutex_grab(aos_mutex *m) {
766 return mutex_get(m, false, NULL);
767}
768
769void mutex_unlock(aos_mutex *m) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400770 RunObservers run_observers(m, true);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400771 const uint32_t tid = get_tid();
Brian Silverman71c55c52014-08-19 14:31:59 -0400772 if (kPrintOperations) {
773 printf("%" PRId32 ": %p unlock\n", tid, m);
774 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400775
776 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_SEQ_CST);
Brian Silverman71c55c52014-08-19 14:31:59 -0400777 if (__builtin_expect((value & FUTEX_TID_MASK) != tid, false)) {
778 my_robust_list::robust_head.pending_next = 0;
Brian Silvermandc1eb272014-08-19 14:25:59 -0400779 check_cached_tid(tid);
780 if ((value & FUTEX_TID_MASK) == 0) {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700781 AOS_LOG(FATAL, "multiple unlock of aos_mutex %p by %" PRId32 "\n", m,
782 tid);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400783 } else {
Austin Schuhf257f3c2019-10-27 21:00:43 -0700784 AOS_LOG(FATAL, "aos_mutex %p is locked by %" PRId32 ", not %" PRId32 "\n",
785 m, value & FUTEX_TID_MASK, tid);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400786 }
787 }
788
Brian Silverman71c55c52014-08-19 14:31:59 -0400789 my_robust_list::Remover remover(m);
790 unlock_pthread_mutex(m);
791
Brian Silvermandc1eb272014-08-19 14:25:59 -0400792 // If the atomic TID->0 transition fails (ie FUTEX_WAITERS is set),
793 if (!compare_and_swap(&m->futex, tid, 0)) {
794 // The kernel handles everything else.
795 const int ret = sys_futex_unlock_pi(&m->futex);
796 if (ret != 0) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400797 my_robust_list::robust_head.pending_next = 0;
Austin Schuhf257f3c2019-10-27 21:00:43 -0700798 AOS_PELOG(FATAL, -ret, "FUTEX_UNLOCK_PI(%p) failed", &m->futex);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400799 }
800 } else {
801 // There aren't any waiters, so no need to call into the kernel.
802 }
803}
804
805int mutex_trylock(aos_mutex *m) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400806 RunObservers run_observers(m, true);
807 const uint32_t tid = get_tid();
808 if (kPrintOperations) {
809 printf("%" PRId32 ": %p trylock\n", tid, m);
810 }
811 my_robust_list::Adder adder(m);
812
Brian Silvermandc1eb272014-08-19 14:25:59 -0400813 // Try an atomic 0->TID transition.
Brian Silverman71c55c52014-08-19 14:31:59 -0400814 uint32_t c = compare_and_swap_val(&m->futex, 0, tid);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400815
816 if (c != 0) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400817 if (__builtin_expect((c & FUTEX_OWNER_DIED) == 0, true)) {
818 // Somebody else had it locked; we failed.
819 return 4;
820 } else {
821 // FUTEX_OWNER_DIED was set, so we have to call into the kernel to deal
822 // with resetting it.
823 const int ret = sys_futex_wait(FUTEX_TRYLOCK_PI, &m->futex, 0, NULL);
824 if (ret == 0) {
825 adder.Add();
826 // Only clear the owner died if somebody else didn't do the recovery
827 // and then unlock before our TRYLOCK happened.
828 return mutex_finish_lock(m);
829 } else {
830 // EWOULDBLOCK means that somebody else beat us to it.
831 if (__builtin_expect(ret == -EWOULDBLOCK, true)) {
832 return 4;
833 }
834 my_robust_list::robust_head.pending_next = 0;
Austin Schuhf257f3c2019-10-27 21:00:43 -0700835 AOS_PELOG(FATAL, -ret, "FUTEX_TRYLOCK_PI(%p, 0, NULL) failed",
836 &m->futex);
Brian Silverman71c55c52014-08-19 14:31:59 -0400837 }
838 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400839 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400840
841 lock_pthread_mutex(m);
842 adder.Add();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400843 return 0;
844}
845
846bool mutex_islocked(const aos_mutex *m) {
847 const uint32_t tid = get_tid();
848
849 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_RELAXED);
850 return (value & FUTEX_TID_MASK) == tid;
851}
852
Austin Schuh0ad2b6f2019-06-09 21:27:07 -0700853int condition_wait(aos_condition *c, aos_mutex *m, struct timespec *end_time) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400854 RunObservers run_observers(c, false);
855 const uint32_t tid = get_tid();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400856 const uint32_t wait_start = __atomic_load_n(c, __ATOMIC_SEQ_CST);
857
858 mutex_unlock(m);
859
Brian Silverman71c55c52014-08-19 14:31:59 -0400860 my_robust_list::Adder adder(m);
861
Brian Silvermandc1eb272014-08-19 14:25:59 -0400862 while (true) {
863 // Wait in the kernel iff the value of it doesn't change (ie somebody else
864 // does a wake) from before we unlocked the mutex.
Austin Schuh0ad2b6f2019-06-09 21:27:07 -0700865 int ret = sys_futex_wait_requeue_pi(c, wait_start, end_time, &m->futex);
866
Brian Silvermandc1eb272014-08-19 14:25:59 -0400867 if (ret != 0) {
Austin Schuh0ad2b6f2019-06-09 21:27:07 -0700868 // Timed out waiting. Signal that back up to the user.
869 if (__builtin_expect(ret == -ETIMEDOUT, true)) {
870 // We have to relock it ourself because the kernel didn't do it.
871 const int r = mutex_do_get(m, false, nullptr, tid);
872 assert(__builtin_expect(r == 0 || r == 1, true));
873 adder.Add();
874
875 // OWNER_DIED takes priority. Pass it on if we found it.
876 if (r == 1) return r;
877 // Otherwise communicate that we were interrupted.
878 return -1;
879 }
880
Brian Silvermandc1eb272014-08-19 14:25:59 -0400881 // If it failed because somebody else did a wake and changed the value
882 // before we actually made it to sleep.
Brian Silverman71c55c52014-08-19 14:31:59 -0400883 if (__builtin_expect(ret == -EAGAIN, true)) {
884 // There's no need to unconditionally set FUTEX_WAITERS here if we're
885 // using REQUEUE_PI because the kernel automatically does that in the
886 // REQUEUE_PI iff it requeued anybody.
887 // If we're not using REQUEUE_PI, then everything is just normal locks
888 // etc, so there's no need to do anything special there either.
Brian Silvermandc1eb272014-08-19 14:25:59 -0400889
890 // We have to relock it ourself because the kernel didn't do it.
Brian Silverman71c55c52014-08-19 14:31:59 -0400891 const int r = mutex_do_get(m, false, nullptr, tid);
892 assert(__builtin_expect(r == 0 || r == 1, true));
893 adder.Add();
Brian Silvermandc1eb272014-08-19 14:25:59 -0400894 return r;
895 }
896 // Try again if it was because of a signal.
Austin Schuh0ad2b6f2019-06-09 21:27:07 -0700897 if (__builtin_expect((ret == -EINTR), true)) {
898 continue;
899 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400900 my_robust_list::robust_head.pending_next = 0;
Austin Schuhf257f3c2019-10-27 21:00:43 -0700901 AOS_PELOG(FATAL, -ret,
902 "FUTEX_WAIT_REQUEUE_PI(%p, %" PRIu32 ", %p) failed", c,
903 wait_start, &m->futex);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400904 } else {
Brian2a4294f2019-06-12 20:23:32 -0700905 // Record that the kernel relocked it for us.
906 lock_pthread_mutex(m);
Brian Silverman71c55c52014-08-19 14:31:59 -0400907
Austin Schuh0ad2b6f2019-06-09 21:27:07 -0700908 // We succeeded in waiting, and the kernel took care of locking the
909 // mutex
Brian Silverman71c55c52014-08-19 14:31:59 -0400910 // for us and setting FUTEX_WAITERS iff it needed to (for REQUEUE_PI).
911
912 adder.Add();
913
914 const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_RELAXED);
915 if (__builtin_expect((value & FUTEX_OWNER_DIED) != 0, false)) {
916 __atomic_and_fetch(&m->futex, ~FUTEX_OWNER_DIED, __ATOMIC_RELAXED);
917 return 1;
918 } else {
919 return 0;
920 }
Brian Silvermandc1eb272014-08-19 14:25:59 -0400921 }
922 }
923}
924
925void condition_signal(aos_condition *c, aos_mutex *m) {
926 condition_wake(c, m, 0);
927}
928
929void condition_broadcast(aos_condition *c, aos_mutex *m) {
930 condition_wake(c, m, INT_MAX);
931}
932
933int futex_wait_timeout(aos_futex *m, const struct timespec *timeout) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400934 RunObservers run_observers(m, false);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400935 const int ret = sys_futex_wait(FUTEX_WAIT, m, 0, timeout);
936 if (ret != 0) {
937 if (ret == -EINTR) {
938 return 1;
939 } else if (ret == -ETIMEDOUT) {
940 return 2;
941 } else if (ret != -EWOULDBLOCK) {
942 errno = -ret;
943 return -1;
944 }
945 }
Brian Silverman71c55c52014-08-19 14:31:59 -0400946 ANNOTATE_HAPPENS_AFTER(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400947 return 0;
948}
949
950int futex_wait(aos_futex *m) { return futex_wait_timeout(m, NULL); }
951
952int futex_set_value(aos_futex *m, uint32_t value) {
Brian Silverman71c55c52014-08-19 14:31:59 -0400953 RunObservers run_observers(m, false);
954 ANNOTATE_HAPPENS_BEFORE(m);
Brian Silvermandc1eb272014-08-19 14:25:59 -0400955 __atomic_store_n(m, value, __ATOMIC_SEQ_CST);
956 const int r = sys_futex_wake(m, INT_MAX - 4096);
957 if (__builtin_expect(
Brian Silverman71c55c52014-08-19 14:31:59 -0400958 static_cast<unsigned int>(r) > static_cast<unsigned int>(-4096),
959 false)) {
Brian Silvermandc1eb272014-08-19 14:25:59 -0400960 errno = -r;
961 return -1;
962 } else {
963 return r;
964 }
965}
966
967int futex_set(aos_futex *m) {
968 return futex_set_value(m, 1);
969}
970
971int futex_unset(aos_futex *m) {
972 return !__atomic_exchange_n(m, 0, __ATOMIC_SEQ_CST);
973}
Brian Silverman71c55c52014-08-19 14:31:59 -0400974
975namespace aos {
976namespace linux_code {
977namespace ipc_lib {
978
979// Sets functions to run befor eand after all futex operations.
980// This is important when doing robustness testing because the memory has to be
981// made writable for the whole futex operation, otherwise it never succeeds.
982void SetFutexAccessorObservers(FutexAccessorObserver before,
983 FutexAccessorObserver after) {
984 before_observer = before;
985 after_observer = after;
986}
987
988// Sets an extra offset between mutexes and the value we use for them in the
989// robust list (only the forward pointers). This is used to work around a kernel
990// bug by keeping a second set of mutexes which is always writable so the kernel
991// won't go into an infinite loop when trying to unlock them.
992void SetRobustListOffset(ptrdiff_t offset) {
993 my_robust_list::SetRobustListOffset(offset);
994}
995
996// Returns true iff there are any mutexes locked by the current thread.
997// This is mainly useful for testing.
998bool HaveLockedMutexes() {
999 return my_robust_list::HaveLockedMutexes();
1000}
1001
1002} // namespace ipc_lib
1003} // namespace linux_code
1004} // namespace aos