Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 1 | #if !AOS_DEBUG |
Austin Schuh | 7a41be6 | 2015-10-31 13:06:55 -0700 | [diff] [blame] | 2 | #undef NDEBUG |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 3 | #define NDEBUG |
| 4 | #endif |
| 5 | |
John Park | 398c74a | 2018-10-20 21:17:39 -0700 | [diff] [blame] | 6 | #include "aos/ipc_lib/aos_sync.h" |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 7 | |
| 8 | #include <linux/futex.h> |
| 9 | #include <unistd.h> |
| 10 | #include <sys/syscall.h> |
| 11 | #include <errno.h> |
| 12 | #include <stdint.h> |
| 13 | #include <limits.h> |
| 14 | #include <string.h> |
| 15 | #include <inttypes.h> |
| 16 | #include <sys/types.h> |
| 17 | #include <stddef.h> |
| 18 | #include <assert.h> |
| 19 | #include <pthread.h> |
| 20 | #include <sched.h> |
| 21 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 22 | #ifdef AOS_SANITIZER_thread |
| 23 | #include <sanitizer/tsan_interface_atomic.h> |
| 24 | #endif |
| 25 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 26 | #include <algorithm> |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 27 | #include <type_traits> |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 28 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 29 | #include "absl/base/call_once.h" |
John Park | 33858a3 | 2018-09-28 23:05:48 -0700 | [diff] [blame] | 30 | #include "aos/macros.h" |
| 31 | #include "aos/util/compiler_memory_barrier.h" |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 32 | #include "glog/logging.h" |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 33 | |
| 34 | using ::aos::linux_code::ipc_lib::FutexAccessorObserver; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 35 | |
Brian Silverman | 0d8ed03 | 2016-05-31 10:37:48 -0700 | [diff] [blame] | 36 | // This code was originally based on <https://www.akkadia.org/drepper/futex.pdf>, |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 37 | // but is has since evolved a lot. However, that still has useful information. |
| 38 | // |
| 39 | // Finding information about actually using futexes is really REALLY hard, so |
| 40 | // here's a list of the stuff that I've used: |
| 41 | // futex(7) has a really high-level overview. |
| 42 | // <http://locklessinc.com/articles/futex_cheat_sheet/> describes some of the |
| 43 | // operations in a bit more detail than most places. |
| 44 | // <http://locklessinc.com/articles/mutex_cv_futex/> is the basis of our |
| 45 | // implementations (before PI). |
| 46 | // <http://lwn.net/Articles/360699/> has a nice overview of futexes in late 2009 |
| 47 | // (fairly recent compared to everything else...). |
| 48 | // <https://www.kernel.org/doc/Documentation/pi-futex.txt>, |
| 49 | // <https://www.kernel.org/doc/Documentation/futex-requeue-pi.txt>, |
| 50 | // <https://www.kernel.org/doc/Documentation/robust-futexes.txt>, |
| 51 | // and <https://www.kernel.org/doc/Documentation/robust-futex-ABI.txt> are all |
| 52 | // useful references. |
| 53 | // The kernel source (kernel/futex.c) has some useful comments about what the |
| 54 | // various operations do (except figuring out which argument goes where in the |
| 55 | // syscall is still confusing). |
| 56 | // futex(2) is basically useless except for describing the order of the |
| 57 | // arguments (it only has high-level descriptions of what some of the |
| 58 | // operations do, and some of them are wrong in Wheezy). |
| 59 | // glibc's nptl pthreads implementation is the intended user of most of these |
| 60 | // things, so it is also a good place to look for examples. However, it is all |
| 61 | // very hard to read because it supports ~20 different kinds of mutexes and |
| 62 | // several variations of condition variables, and some of the pieces of code |
| 63 | // are only written in assembly. |
| 64 | // set_robust_list(2) is wrong in Wheezy (it doesn't actually take a TID |
| 65 | // argument). |
| 66 | // |
| 67 | // Can't use PRIVATE futex operations because they use the pid (or something) as |
| 68 | // part of the hash. |
| 69 | // |
| 70 | // ThreadSanitizer understands how these mutexes etc work. It appears to be able |
| 71 | // to figure out the happens-before relationship from the __ATOMIC_SEQ_CST |
| 72 | // atomic primitives. |
| 73 | // |
| 74 | // Remember that EAGAIN and EWOUDBLOCK are the same! (ie if you get EAGAIN from |
| 75 | // FUTEX_WAIT, the docs call it EWOULDBLOCK...) |
| 76 | |
| 77 | // Values for an aos_mutex.futex (kernel-mandated): |
| 78 | // 0 = unlocked |
| 79 | // TID = locked, not contended |
| 80 | // |FUTEX_WAITERS = there are waiters (aka contended) |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 81 | // |FUTEX_OWNER_DIED = old owner died |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 82 | // |
| 83 | // Values for an aos_futex being used directly: |
| 84 | // 0 = unset |
| 85 | // 1 = set |
| 86 | // |
| 87 | // The value of an aos_condition is just a generation counter. |
| 88 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 89 | #ifdef AOS_SANITIZER_thread |
| 90 | extern "C" void AnnotateHappensBefore(const char *file, int line, |
| 91 | uintptr_t addr); |
| 92 | extern "C" void AnnotateHappensAfter(const char *file, int line, |
| 93 | uintptr_t addr); |
| 94 | #define ANNOTATE_HAPPENS_BEFORE(address) \ |
| 95 | AnnotateHappensBefore(__FILE__, __LINE__, \ |
| 96 | reinterpret_cast<uintptr_t>(address)) |
| 97 | #define ANNOTATE_HAPPENS_AFTER(address) \ |
| 98 | AnnotateHappensAfter(__FILE__, __LINE__, reinterpret_cast<uintptr_t>(address)) |
| 99 | #else |
| 100 | #define ANNOTATE_HAPPENS_BEFORE(address) |
| 101 | #define ANNOTATE_HAPPENS_AFTER(address) |
| 102 | #endif |
| 103 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 104 | namespace { |
| 105 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 106 | const bool kRobustListDebug = false; |
| 107 | const bool kLockDebug = false; |
| 108 | const bool kPrintOperations = false; |
| 109 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 110 | // These sys_futex_* functions are wrappers around syscall(SYS_futex). They each |
| 111 | // take a specific set of arguments for a given futex operation. They return the |
| 112 | // result or a negated errno value. -1..-4095 mean errors and not successful |
| 113 | // results, which is guaranteed by the kernel. |
| 114 | // |
| 115 | // They each have optimized versions for ARM EABI (the syscall interface is |
| 116 | // different for non-EABI ARM, so that is the right thing to test for) that |
| 117 | // don't go through syscall(2) or errno. |
| 118 | // These use register variables to get the values in the right registers to |
| 119 | // actually make the syscall. |
| 120 | |
| 121 | // The actual macro that we key off of to use the inline versions or not. |
Brian Silverman | 17426d9 | 2018-08-09 11:38:49 -0700 | [diff] [blame] | 122 | #if defined(__ARM_EABI__) |
| 123 | #define ARM_EABI_INLINE_SYSCALL 1 |
| 124 | #else |
| 125 | #define ARM_EABI_INLINE_SYSCALL 0 |
| 126 | #endif |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 127 | |
| 128 | // Used for FUTEX_WAIT, FUTEX_LOCK_PI, and FUTEX_TRYLOCK_PI. |
| 129 | inline int sys_futex_wait(int op, aos_futex *addr1, int val1, |
| 130 | const struct timespec *timeout) { |
| 131 | #if ARM_EABI_INLINE_SYSCALL |
| 132 | register aos_futex *addr1_reg __asm__("r0") = addr1; |
| 133 | register int op_reg __asm__("r1") = op; |
| 134 | register int val1_reg __asm__("r2") = val1; |
| 135 | register const struct timespec *timeout_reg __asm__("r3") = timeout; |
| 136 | register int syscall_number __asm__("r7") = SYS_futex; |
| 137 | register int result __asm__("r0"); |
| 138 | __asm__ volatile("swi #0" |
| 139 | : "=r"(result) |
| 140 | : "r"(addr1_reg), "r"(op_reg), "r"(val1_reg), |
| 141 | "r"(timeout_reg), "r"(syscall_number) |
| 142 | : "memory"); |
| 143 | return result; |
| 144 | #else |
| 145 | const int r = syscall(SYS_futex, addr1, op, val1, timeout); |
| 146 | if (r == -1) return -errno; |
| 147 | return r; |
| 148 | #endif |
| 149 | } |
| 150 | |
| 151 | inline int sys_futex_wake(aos_futex *addr1, int val1) { |
| 152 | #if ARM_EABI_INLINE_SYSCALL |
| 153 | register aos_futex *addr1_reg __asm__("r0") = addr1; |
| 154 | register int op_reg __asm__("r1") = FUTEX_WAKE; |
| 155 | register int val1_reg __asm__("r2") = val1; |
| 156 | register int syscall_number __asm__("r7") = SYS_futex; |
| 157 | register int result __asm__("r0"); |
| 158 | __asm__ volatile("swi #0" |
| 159 | : "=r"(result) |
| 160 | : "r"(addr1_reg), "r"(op_reg), "r"(val1_reg), |
| 161 | "r"(syscall_number) |
| 162 | : "memory"); |
| 163 | return result; |
| 164 | #else |
| 165 | const int r = syscall(SYS_futex, addr1, FUTEX_WAKE, val1); |
| 166 | if (r == -1) return -errno; |
| 167 | return r; |
| 168 | #endif |
| 169 | } |
| 170 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 171 | inline int sys_futex_cmp_requeue_pi(aos_futex *addr1, int num_wake, |
| 172 | int num_requeue, aos_futex *m, uint32_t val) { |
| 173 | #if ARM_EABI_INLINE_SYSCALL |
| 174 | register aos_futex *addr1_reg __asm__("r0") = addr1; |
| 175 | register int op_reg __asm__("r1") = FUTEX_CMP_REQUEUE_PI; |
| 176 | register int num_wake_reg __asm__("r2") = num_wake; |
| 177 | register int num_requeue_reg __asm__("r3") = num_requeue; |
| 178 | register aos_futex *m_reg __asm__("r4") = m; |
| 179 | register uint32_t val_reg __asm__("r5") = val; |
| 180 | register int syscall_number __asm__("r7") = SYS_futex; |
| 181 | register int result __asm__("r0"); |
| 182 | __asm__ volatile("swi #0" |
| 183 | : "=r"(result) |
| 184 | : "r"(addr1_reg), "r"(op_reg), "r"(num_wake_reg), |
| 185 | "r"(num_requeue_reg), "r"(m_reg), "r"(val_reg), |
| 186 | "r"(syscall_number) |
| 187 | : "memory"); |
| 188 | return result; |
| 189 | #else |
| 190 | const int r = syscall(SYS_futex, addr1, FUTEX_CMP_REQUEUE_PI, num_wake, |
| 191 | num_requeue, m, val); |
| 192 | if (r == -1) return -errno; |
| 193 | return r; |
| 194 | #endif |
| 195 | } |
| 196 | |
| 197 | inline int sys_futex_wait_requeue_pi(aos_condition *addr1, |
| 198 | uint32_t start_val, |
| 199 | const struct timespec *timeout, |
| 200 | aos_futex *m) { |
| 201 | #if ARM_EABI_INLINE_SYSCALL |
| 202 | register aos_condition *addr1_reg __asm__("r0") = addr1; |
| 203 | register int op_reg __asm__("r1") = FUTEX_WAIT_REQUEUE_PI; |
| 204 | register uint32_t start_val_reg __asm__("r2") = start_val; |
| 205 | register const struct timespec *timeout_reg __asm__("r3") = timeout; |
| 206 | register aos_futex *m_reg __asm__("r4") = m; |
| 207 | register int syscall_number __asm__("r7") = SYS_futex; |
| 208 | register int result __asm__("r0"); |
| 209 | __asm__ volatile("swi #0" |
| 210 | : "=r"(result) |
| 211 | : "r"(addr1_reg), "r"(op_reg), "r"(start_val_reg), |
| 212 | "r"(timeout_reg), "r"(m_reg), "r"(syscall_number) |
| 213 | : "memory"); |
| 214 | return result; |
| 215 | #else |
| 216 | const int r = |
| 217 | syscall(SYS_futex, addr1, FUTEX_WAIT_REQUEUE_PI, start_val, timeout, m); |
| 218 | if (r == -1) return -errno; |
| 219 | return r; |
| 220 | #endif |
| 221 | } |
| 222 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 223 | inline int sys_futex_unlock_pi(aos_futex *addr1) { |
| 224 | #if ARM_EABI_INLINE_SYSCALL |
| 225 | register aos_futex *addr1_reg __asm__("r0") = addr1; |
| 226 | register int op_reg __asm__("r1") = FUTEX_UNLOCK_PI; |
| 227 | register int syscall_number __asm__("r7") = SYS_futex; |
| 228 | register int result __asm__("r0"); |
| 229 | __asm__ volatile("swi #0" |
| 230 | : "=r"(result) |
| 231 | : "r"(addr1_reg), "r"(op_reg), "r"(syscall_number) |
| 232 | : "memory"); |
| 233 | return result; |
| 234 | #else |
| 235 | const int r = syscall(SYS_futex, addr1, FUTEX_UNLOCK_PI); |
| 236 | if (r == -1) return -errno; |
| 237 | return r; |
| 238 | #endif |
| 239 | } |
| 240 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 241 | // Returns the previous value of f. |
| 242 | inline uint32_t compare_and_swap_val(aos_futex *f, uint32_t before, |
| 243 | uint32_t after) { |
| 244 | #ifdef AOS_SANITIZER_thread |
| 245 | // This is a workaround for <https://llvm.org/bugs/show_bug.cgi?id=23176>. |
| 246 | // Basically, most of the atomic operations are broken under tsan, but this |
| 247 | // particular one isn't. |
| 248 | // TODO(Brian): Remove this #ifdef (and the one in compare_and_swap) once we |
| 249 | // don't have to worry about tsan with this bug any more. |
| 250 | uint32_t before_value = before; |
| 251 | __tsan_atomic32_compare_exchange_strong( |
| 252 | reinterpret_cast<int32_t *>(f), |
| 253 | reinterpret_cast<int32_t *>(&before_value), after, |
| 254 | __tsan_memory_order_seq_cst, __tsan_memory_order_seq_cst); |
| 255 | return before_value; |
| 256 | #else |
| 257 | return __sync_val_compare_and_swap(f, before, after); |
| 258 | #endif |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 259 | } |
| 260 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 261 | // Returns true if it succeeds and false if it fails. |
| 262 | inline bool compare_and_swap(aos_futex *f, uint32_t before, uint32_t after) { |
| 263 | #ifdef AOS_SANITIZER_thread |
| 264 | return compare_and_swap_val(f, before, after) == before; |
| 265 | #else |
| 266 | return __sync_bool_compare_and_swap(f, before, after); |
| 267 | #endif |
| 268 | } |
| 269 | |
| 270 | #ifdef AOS_SANITIZER_thread |
| 271 | |
| 272 | // Simple macro for checking something which should always be true. |
| 273 | // Using the standard CHECK macro isn't safe because failures often result in |
| 274 | // reentering the mutex locking code, which doesn't work. |
| 275 | #define SIMPLE_CHECK(expr) \ |
| 276 | do { \ |
| 277 | if (!(expr)) { \ |
| 278 | fprintf(stderr, "%s: %d: SIMPLE_CHECK(" #expr ") failed!\n", __FILE__, \ |
| 279 | __LINE__); \ |
| 280 | abort(); \ |
| 281 | } \ |
| 282 | } while (false) |
| 283 | |
| 284 | // Forcibly initializes the pthread mutex for *m. |
| 285 | // This sequence of operations is only safe for the simpler kinds of mutexes in |
| 286 | // glibc's pthreads implementation on Linux. |
| 287 | void init_pthread_mutex(aos_mutex *m) { |
| 288 | // Re-initialize the mutex so the destroy won't fail if it's locked. |
| 289 | // tsan ignores this. |
| 290 | SIMPLE_CHECK(0 == pthread_mutex_init(&m->pthread_mutex, nullptr)); |
| 291 | // Destroy the mutex so tsan will forget about it if some now-dead thread |
| 292 | // locked it. |
| 293 | SIMPLE_CHECK(0 == pthread_mutex_destroy(&m->pthread_mutex)); |
| 294 | |
| 295 | // Now actually initialize it, making sure it's process-shareable so it works |
| 296 | // correctly across shared memory. |
| 297 | pthread_mutexattr_t attr; |
| 298 | SIMPLE_CHECK(0 == pthread_mutexattr_init(&attr)); |
| 299 | SIMPLE_CHECK(0 == pthread_mutexattr_setpshared(&attr, true)); |
| 300 | SIMPLE_CHECK(0 == pthread_mutex_init(&m->pthread_mutex, &attr)); |
| 301 | SIMPLE_CHECK(0 == pthread_mutexattr_destroy(&attr)); |
| 302 | } |
| 303 | |
| 304 | // Locks the pthread mutex for *m. |
| 305 | // If a stack trace ever reveals the pthread_mutex_lock call in here blocking, |
| 306 | // there is a bug in our mutex code or the way somebody is calling it. |
| 307 | void lock_pthread_mutex(aos_mutex *m) { |
| 308 | if (!m->pthread_mutex_init) { |
| 309 | init_pthread_mutex(m); |
| 310 | m->pthread_mutex_init = true; |
| 311 | } |
| 312 | SIMPLE_CHECK(0 == pthread_mutex_lock(&m->pthread_mutex)); |
| 313 | } |
| 314 | |
| 315 | // Forcibly locks the pthread mutex for *m. |
| 316 | // This will (somewhat hackily) rip the lock out from underneath somebody else |
| 317 | // who is already holding it. |
| 318 | void force_lock_pthread_mutex(aos_mutex *m) { |
| 319 | if (!m->pthread_mutex_init) { |
| 320 | init_pthread_mutex(m); |
| 321 | m->pthread_mutex_init = true; |
| 322 | } |
| 323 | const int trylock_result = pthread_mutex_trylock(&m->pthread_mutex); |
| 324 | SIMPLE_CHECK(trylock_result == 0 || trylock_result == EBUSY); |
| 325 | if (trylock_result == 0) { |
| 326 | // We're good, so unlock it and then go for a real lock down below. |
| 327 | SIMPLE_CHECK(0 == pthread_mutex_unlock(&m->pthread_mutex)); |
| 328 | } else { |
| 329 | // Somebody (should always be somebody else who died with it held) already |
| 330 | // has it, so make tsan forget about that. |
| 331 | init_pthread_mutex(m); |
| 332 | } |
| 333 | lock_pthread_mutex(m); |
| 334 | } |
| 335 | |
| 336 | // Unlocks the pthread mutex for *m. |
| 337 | void unlock_pthread_mutex(aos_mutex *m) { |
| 338 | assert(m->pthread_mutex_init); |
| 339 | SIMPLE_CHECK(0 == pthread_mutex_unlock(&m->pthread_mutex)); |
| 340 | } |
| 341 | |
| 342 | #else |
| 343 | |
| 344 | // Empty implementations of all these so the code below doesn't need #ifdefs. |
| 345 | static inline void lock_pthread_mutex(aos_mutex *) {} |
| 346 | static inline void force_lock_pthread_mutex(aos_mutex *) {} |
| 347 | static inline void unlock_pthread_mutex(aos_mutex *) {} |
| 348 | |
| 349 | #endif |
| 350 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 351 | pid_t do_get_tid() { |
| 352 | pid_t r = syscall(SYS_gettid); |
| 353 | assert(r > 0); |
| 354 | return r; |
| 355 | } |
| 356 | |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 357 | // This gets called by functions before LOG(FATAL)ing with error messages |
Austin Schuh | f257f3c | 2019-10-27 21:00:43 -0700 | [diff] [blame] | 358 | // that would be incorrect if the error was caused by a process forking without |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 359 | // initialize_in_new_thread getting called in the fork. |
| 360 | void check_cached_tid(pid_t tid) { |
| 361 | pid_t actual = do_get_tid(); |
| 362 | if (tid != actual) { |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 363 | LOG(FATAL) << "task " << static_cast<intmax_t>(tid) << " forked into " |
| 364 | << static_cast<intmax_t>(actual) |
| 365 | << " without letting aos_sync know so we're not really sure " |
| 366 | "what's going on"; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 367 | } |
| 368 | } |
| 369 | |
| 370 | // Starts off at 0 in each new thread (because that's what it gets initialized |
| 371 | // to in most of them or it gets to reset to 0 after a fork by atfork_child()). |
Brian Silverman | 8f373b1 | 2015-04-03 15:36:52 -0400 | [diff] [blame] | 372 | thread_local pid_t my_tid = 0; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 373 | |
| 374 | // Gets called before the fork(2) wrapper function returns in the child. |
| 375 | void atfork_child() { |
| 376 | // The next time get_tid() is called, it will set everything up again. |
| 377 | my_tid = 0; |
| 378 | } |
| 379 | |
John Park | 0e69950 | 2019-11-20 19:36:05 -0800 | [diff] [blame] | 380 | void InstallAtforkHook() { |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 381 | PCHECK(pthread_atfork(NULL, NULL, &atfork_child) == 0) |
| 382 | << ": pthread_atfork(NULL, NULL, " |
| 383 | << reinterpret_cast<void *>(&atfork_child) << ") failed"; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 384 | } |
| 385 | |
| 386 | // This gets called to set everything up in a new thread by get_tid(). |
| 387 | void initialize_in_new_thread(); |
| 388 | |
| 389 | // Gets the current thread's TID and does all of the 1-time initialization the |
| 390 | // first time it's called in a given thread. |
| 391 | inline uint32_t get_tid() { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 392 | if (__builtin_expect(my_tid == 0, false)) { |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 393 | initialize_in_new_thread(); |
| 394 | } |
| 395 | static_assert(sizeof(my_tid) <= sizeof(uint32_t), "pid_t is too big"); |
| 396 | return static_cast<uint32_t>(my_tid); |
| 397 | } |
| 398 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 399 | // Contains all of the stuff for dealing with the robust list. Nothing outside |
| 400 | // this namespace should touch anything inside it except Init, Adder, and |
| 401 | // Remover. |
| 402 | namespace my_robust_list { |
| 403 | |
| 404 | static_assert(offsetof(aos_mutex, next) == 0, |
| 405 | "Our math all assumes that the beginning of a mutex and its next " |
| 406 | "pointer are at the same place in memory."); |
| 407 | |
| 408 | // Our version of robust_list_head. |
| 409 | // This is copied from the kernel header because that's a pretty stable ABI (and |
| 410 | // any changes will be backwards compatible anyways) and we want ours to have |
| 411 | // different types. |
| 412 | // The uintptr_ts are &next of the elements in the list (with stuff |ed in). |
| 413 | struct aos_robust_list_head { |
| 414 | uintptr_t next; |
| 415 | long futex_offset; |
| 416 | uintptr_t pending_next; |
| 417 | }; |
| 418 | |
| 419 | static_assert(offsetof(aos_robust_list_head, next) == |
| 420 | offsetof(robust_list_head, list), |
| 421 | "Our aos_robust_list_head doesn't match the kernel's"); |
| 422 | static_assert(offsetof(aos_robust_list_head, futex_offset) == |
| 423 | offsetof(robust_list_head, futex_offset), |
| 424 | "Our aos_robust_list_head doesn't match the kernel's"); |
| 425 | static_assert(offsetof(aos_robust_list_head, pending_next) == |
| 426 | offsetof(robust_list_head, list_op_pending), |
| 427 | "Our aos_robust_list_head doesn't match the kernel's"); |
| 428 | static_assert(sizeof(aos_robust_list_head) == sizeof(robust_list_head), |
| 429 | "Our aos_robust_list_head doesn't match the kernel's"); |
| 430 | |
| 431 | thread_local aos_robust_list_head robust_head; |
| 432 | |
| 433 | // Extra offset between mutex values and where we point to for their robust list |
| 434 | // entries (from SetRobustListOffset). |
| 435 | uintptr_t robust_list_offset = 0; |
| 436 | |
| 437 | // The value to OR each pointer's value with whenever putting it into the robust |
| 438 | // list (technically only if it's PI, but all of ours are, so...). |
| 439 | static const uintptr_t kRobustListOr = 1; |
| 440 | |
| 441 | // Returns the value which goes into a next variable to represent the head. |
| 442 | inline uintptr_t robust_head_next_value() { |
| 443 | return reinterpret_cast<uintptr_t>(&robust_head.next); |
| 444 | } |
| 445 | // Returns true iff next represents the head. |
| 446 | inline bool next_is_head(uintptr_t next) { |
| 447 | return next == robust_head_next_value(); |
| 448 | } |
| 449 | // Returns the (psuedo-)mutex corresponding to the head. |
| 450 | // This does NOT have a previous pointer, so be careful with the return value. |
| 451 | inline aos_mutex *robust_head_mutex() { |
| 452 | return reinterpret_cast<aos_mutex *>(robust_head_next_value()); |
| 453 | } |
| 454 | |
| 455 | inline uintptr_t mutex_to_next(aos_mutex *m) { |
| 456 | return (reinterpret_cast<uintptr_t>(&m->next) + robust_list_offset) | |
| 457 | kRobustListOr; |
| 458 | } |
| 459 | inline aos_mutex *next_to_mutex(uintptr_t next) { |
| 460 | if (__builtin_expect(robust_list_offset != 0, false) && next_is_head(next)) { |
| 461 | // We don't offset the head pointer, so be careful. |
| 462 | return reinterpret_cast<aos_mutex *>(next); |
| 463 | } |
| 464 | return reinterpret_cast<aos_mutex *>( |
| 465 | (next & ~kRobustListOr) - robust_list_offset); |
| 466 | } |
| 467 | |
| 468 | // Sets up the robust list for each thread. |
| 469 | void Init() { |
| 470 | // It starts out just pointing back to itself. |
| 471 | robust_head.next = robust_head_next_value(); |
| 472 | robust_head.futex_offset = static_cast<ssize_t>(offsetof(aos_mutex, futex)) - |
| 473 | static_cast<ssize_t>(offsetof(aos_mutex, next)); |
| 474 | robust_head.pending_next = 0; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 475 | PCHECK(syscall(SYS_set_robust_list, robust_head_next_value(), |
| 476 | sizeof(robust_head)) == 0) |
| 477 | << ": set_robust_list(" << reinterpret_cast<void *>(robust_head.next) |
| 478 | << ", " << sizeof(robust_head) << ") failed"; |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 479 | if (kRobustListDebug) { |
| 480 | printf("%" PRId32 ": init done\n", get_tid()); |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | // Updating the offset with locked mutexes is important during robustness |
| 485 | // testing, because there are mutexes which are locked before this is set to a |
| 486 | // non-0 value and then unlocked after it is changed back. However, to make sure |
| 487 | // the code works correctly when manipulating the next pointer of the last of |
| 488 | // those mutexes, all of their next values have to be adjusted appropriately. |
| 489 | void SetRobustListOffset(uintptr_t offset) { |
| 490 | const uintptr_t offset_change = offset - robust_list_offset; |
| 491 | robust_list_offset = offset; |
| 492 | aos_mutex *m = robust_head_mutex(); |
| 493 | // Update the offset contained in each of the mutexes which is already locked. |
| 494 | while (!next_is_head(m->next)) { |
| 495 | m->next += offset_change; |
| 496 | m = next_to_mutex(m->next); |
| 497 | } |
| 498 | } |
| 499 | |
| 500 | bool HaveLockedMutexes() { |
| 501 | return robust_head.next != robust_head_next_value(); |
| 502 | } |
| 503 | |
| 504 | // Handles adding a mutex to the robust list. |
| 505 | // The idea is to create one of these at the beginning of a function that needs |
| 506 | // to do this and then call Add() iff it should actually be added. |
| 507 | class Adder { |
| 508 | public: |
| 509 | Adder(aos_mutex *m) : m_(m) { |
| 510 | assert(robust_head.pending_next == 0); |
| 511 | if (kRobustListDebug) { |
| 512 | printf("%" PRId32 ": maybe add %p\n", get_tid(), m_); |
| 513 | } |
| 514 | robust_head.pending_next = mutex_to_next(m); |
| 515 | aos_compiler_memory_barrier(); |
| 516 | } |
| 517 | ~Adder() { |
| 518 | assert(robust_head.pending_next == mutex_to_next(m_)); |
| 519 | if (kRobustListDebug) { |
| 520 | printf("%" PRId32 ": done maybe add %p, n=%p p=%p\n", get_tid(), m_, |
| 521 | next_to_mutex(m_->next), m_->previous); |
| 522 | } |
| 523 | aos_compiler_memory_barrier(); |
| 524 | robust_head.pending_next = 0; |
| 525 | } |
| 526 | |
| 527 | void Add() { |
| 528 | assert(robust_head.pending_next == mutex_to_next(m_)); |
| 529 | if (kRobustListDebug) { |
| 530 | printf("%" PRId32 ": adding %p\n", get_tid(), m_); |
| 531 | } |
| 532 | const uintptr_t old_head_next_value = robust_head.next; |
| 533 | |
| 534 | m_->next = old_head_next_value; |
| 535 | aos_compiler_memory_barrier(); |
| 536 | robust_head.next = mutex_to_next(m_); |
| 537 | |
| 538 | m_->previous = robust_head_mutex(); |
| 539 | if (!next_is_head(old_head_next_value)) { |
| 540 | // robust_head's psuedo-mutex doesn't have a previous pointer to update. |
| 541 | next_to_mutex(old_head_next_value)->previous = m_; |
| 542 | } |
| 543 | aos_compiler_memory_barrier(); |
| 544 | if (kRobustListDebug) { |
| 545 | printf("%" PRId32 ": done adding %p\n", get_tid(), m_); |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | private: |
| 550 | aos_mutex *const m_; |
| 551 | |
| 552 | DISALLOW_COPY_AND_ASSIGN(Adder); |
| 553 | }; |
| 554 | |
| 555 | // Handles removing a mutex from the robust list. |
| 556 | // The idea is to create one of these at the beginning of a function that needs |
| 557 | // to do this. |
| 558 | class Remover { |
| 559 | public: |
| 560 | Remover(aos_mutex *m) { |
| 561 | assert(robust_head.pending_next == 0); |
| 562 | if (kRobustListDebug) { |
| 563 | printf("%" PRId32 ": beginning to remove %p, n=%p p=%p\n", get_tid(), m, |
| 564 | next_to_mutex(m->next), m->previous); |
| 565 | } |
| 566 | robust_head.pending_next = mutex_to_next(m); |
| 567 | aos_compiler_memory_barrier(); |
| 568 | |
| 569 | aos_mutex *const previous = m->previous; |
| 570 | const uintptr_t next_value = m->next; |
| 571 | |
| 572 | previous->next = m->next; |
| 573 | if (!next_is_head(next_value)) { |
| 574 | // robust_head's psuedo-mutex doesn't have a previous pointer to update. |
| 575 | next_to_mutex(next_value)->previous = previous; |
| 576 | } |
| 577 | |
| 578 | if (kRobustListDebug) { |
| 579 | printf("%" PRId32 ": done removing %p\n", get_tid(), m); |
| 580 | } |
| 581 | } |
| 582 | ~Remover() { |
| 583 | assert(robust_head.pending_next != 0); |
| 584 | aos_compiler_memory_barrier(); |
| 585 | robust_head.pending_next = 0; |
| 586 | if (kRobustListDebug) { |
| 587 | printf("%" PRId32 ": done with removal\n", get_tid()); |
| 588 | } |
| 589 | } |
| 590 | |
| 591 | private: |
| 592 | DISALLOW_COPY_AND_ASSIGN(Remover); |
| 593 | }; |
| 594 | |
| 595 | } // namespace my_robust_list |
| 596 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 597 | void initialize_in_new_thread() { |
| 598 | // No synchronization necessary in most of this because it's all thread-local! |
| 599 | |
| 600 | my_tid = do_get_tid(); |
| 601 | |
John Park | 9372a68 | 2019-11-27 18:07:48 -0800 | [diff] [blame] | 602 | static absl::once_flag once; |
| 603 | absl::call_once(once, InstallAtforkHook); |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 604 | |
| 605 | my_robust_list::Init(); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 606 | } |
| 607 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 608 | FutexAccessorObserver before_observer = nullptr, after_observer = nullptr; |
| 609 | |
| 610 | // RAII class which runs before_observer during construction and after_observer |
| 611 | // during destruction. |
| 612 | class RunObservers { |
| 613 | public: |
| 614 | template <class T> |
| 615 | RunObservers(T *address, bool write) |
| 616 | : address_(static_cast<void *>( |
| 617 | const_cast<typename ::std::remove_cv<T>::type *>(address))), |
| 618 | write_(write) { |
| 619 | if (__builtin_expect(before_observer != nullptr, false)) { |
| 620 | before_observer(address_, write_); |
| 621 | } |
| 622 | } |
| 623 | ~RunObservers() { |
| 624 | if (__builtin_expect(after_observer != nullptr, false)) { |
| 625 | after_observer(address_, write_); |
| 626 | } |
| 627 | } |
| 628 | |
| 629 | private: |
| 630 | void *const address_; |
| 631 | const bool write_; |
| 632 | |
| 633 | DISALLOW_COPY_AND_ASSIGN(RunObservers); |
| 634 | }; |
| 635 | |
| 636 | // Finishes the locking of a mutex by potentially clearing FUTEX_OWNER_DIED in |
| 637 | // the futex and returning the correct value. |
| 638 | inline int mutex_finish_lock(aos_mutex *m) { |
| 639 | const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_ACQUIRE); |
| 640 | if (__builtin_expect((value & FUTEX_OWNER_DIED) != 0, false)) { |
| 641 | __atomic_and_fetch(&m->futex, ~FUTEX_OWNER_DIED, __ATOMIC_RELAXED); |
| 642 | force_lock_pthread_mutex(m); |
| 643 | return 1; |
| 644 | } else { |
| 645 | lock_pthread_mutex(m); |
| 646 | return 0; |
| 647 | } |
| 648 | } |
| 649 | |
| 650 | // Split out separately from mutex_get so condition_wait can call it and use its |
| 651 | // own my_robust_list::Adder. |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 652 | inline int mutex_do_get(aos_mutex *m, bool signals_fail, |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 653 | const struct timespec *timeout, uint32_t tid) { |
| 654 | RunObservers run_observers(m, true); |
| 655 | if (kPrintOperations) { |
| 656 | printf("%" PRId32 ": %p do_get\n", tid, m); |
| 657 | } |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 658 | |
| 659 | while (true) { |
| 660 | // If the atomic 0->TID transition fails. |
| 661 | if (!compare_and_swap(&m->futex, 0, tid)) { |
| 662 | // Wait in the kernel, which handles atomically ORing in FUTEX_WAITERS |
| 663 | // before actually sleeping. |
| 664 | const int ret = sys_futex_wait(FUTEX_LOCK_PI, &m->futex, 1, timeout); |
| 665 | if (ret != 0) { |
| 666 | if (timeout != NULL && ret == -ETIMEDOUT) { |
| 667 | return 3; |
| 668 | } |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 669 | if (__builtin_expect(ret == -EINTR, true)) { |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 670 | if (signals_fail) { |
| 671 | return 2; |
| 672 | } else { |
| 673 | continue; |
| 674 | } |
| 675 | } |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 676 | my_robust_list::robust_head.pending_next = 0; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 677 | CHECK_NE(ret, -EDEADLK) << ": multiple lock of " << m << " by " << tid; |
| 678 | |
| 679 | errno = -ret; |
| 680 | PLOG(FATAL) << "FUTEX_LOCK_PI(" << &m->futex |
| 681 | << "(=" << __atomic_load_n(&m->futex, __ATOMIC_SEQ_CST) |
| 682 | << "), 1, " << timeout << ") failed"; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 683 | } else { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 684 | if (kLockDebug) { |
| 685 | printf("%" PRId32 ": %p kernel lock done\n", tid, m); |
| 686 | } |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 687 | // The kernel already handled setting the value to our TID (ish). |
| 688 | break; |
| 689 | } |
| 690 | } else { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 691 | if (kLockDebug) { |
| 692 | printf("%" PRId32 ": %p fast lock done\n", tid, m); |
| 693 | } |
| 694 | lock_pthread_mutex(m); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 695 | // Fastpath succeeded, so no need to call into the kernel. |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 696 | // Because this is the fastpath, it's a good idea to avoid even having to |
| 697 | // load the value again down below. |
| 698 | return 0; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 699 | } |
| 700 | } |
| 701 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 702 | return mutex_finish_lock(m); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 703 | } |
| 704 | |
| 705 | // The common implementation for everything that wants to lock a mutex. |
| 706 | // If signals_fail is false, the function will try again if the wait syscall is |
| 707 | // interrupted by a signal. |
| 708 | // timeout can be NULL for no timeout. |
| 709 | inline int mutex_get(aos_mutex *m, bool signals_fail, |
| 710 | const struct timespec *timeout) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 711 | const uint32_t tid = get_tid(); |
| 712 | my_robust_list::Adder adder(m); |
| 713 | const int r = mutex_do_get(m, signals_fail, timeout, tid); |
| 714 | if (r == 0 || r == 1) adder.Add(); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 715 | return r; |
| 716 | } |
| 717 | |
| 718 | // The common implementation for broadcast and signal. |
| 719 | // number_requeue is the number of waiters to requeue (probably INT_MAX or 0). 1 |
| 720 | // will always be woken. |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 721 | void condition_wake(aos_condition *c, aos_mutex *m, int number_requeue) { |
| 722 | RunObservers run_observers(c, true); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 723 | // Make it so that anybody just going to sleep won't. |
| 724 | // This is where we might accidentally wake more than just 1 waiter with 1 |
| 725 | // signal(): |
| 726 | // 1 already sleeping will be woken but n might never actually make it to |
| 727 | // sleep in the kernel because of this. |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 728 | uint32_t new_value = __atomic_add_fetch(c, 1, __ATOMIC_SEQ_CST); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 729 | |
Brian | 2a4294f | 2019-06-12 20:23:32 -0700 | [diff] [blame] | 730 | while (true) { |
| 731 | // This really wants to be FUTEX_REQUEUE_PI, but the kernel doesn't have |
| 732 | // that... However, the code to support that is in the kernel, so it might |
| 733 | // be a good idea to patch it to support that and use it iff it's there. |
| 734 | const int ret = |
| 735 | sys_futex_cmp_requeue_pi(c, 1, number_requeue, &m->futex, new_value); |
| 736 | if (ret < 0) { |
| 737 | // If the value got changed out from under us (aka somebody else did a |
| 738 | // condition_wake). |
| 739 | if (__builtin_expect(ret == -EAGAIN, true)) { |
| 740 | // If we're doing a broadcast, the other guy might have done a signal |
| 741 | // instead, so we have to try again. |
| 742 | // If we're doing a signal, we have to go again to make sure that 2 |
| 743 | // signals wake 2 processes. |
| 744 | new_value = __atomic_load_n(c, __ATOMIC_RELAXED); |
| 745 | continue; |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 746 | } |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 747 | my_robust_list::robust_head.pending_next = 0; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 748 | errno = -ret; |
| 749 | PLOG(FATAL) << "FUTEX_CMP_REQUEUE_PI(" << c << ", 1, " << number_requeue |
| 750 | << ", " << &m->futex << ", *" << c << ") failed"; |
Brian | 2a4294f | 2019-06-12 20:23:32 -0700 | [diff] [blame] | 751 | } else { |
| 752 | return; |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 753 | } |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 754 | } |
| 755 | } |
| 756 | |
| 757 | } // namespace |
| 758 | |
| 759 | int mutex_lock(aos_mutex *m) { |
| 760 | return mutex_get(m, true, NULL); |
| 761 | } |
| 762 | int mutex_lock_timeout(aos_mutex *m, const struct timespec *timeout) { |
| 763 | return mutex_get(m, true, timeout); |
| 764 | } |
| 765 | int mutex_grab(aos_mutex *m) { |
| 766 | return mutex_get(m, false, NULL); |
| 767 | } |
| 768 | |
| 769 | void mutex_unlock(aos_mutex *m) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 770 | RunObservers run_observers(m, true); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 771 | const uint32_t tid = get_tid(); |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 772 | if (kPrintOperations) { |
| 773 | printf("%" PRId32 ": %p unlock\n", tid, m); |
| 774 | } |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 775 | |
| 776 | const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_SEQ_CST); |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 777 | if (__builtin_expect((value & FUTEX_TID_MASK) != tid, false)) { |
| 778 | my_robust_list::robust_head.pending_next = 0; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 779 | check_cached_tid(tid); |
| 780 | if ((value & FUTEX_TID_MASK) == 0) { |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 781 | LOG(FATAL) << "multiple unlock of aos_mutex " << m << " by " << tid; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 782 | } else { |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 783 | LOG(FATAL) << "aos_mutex " << m << " is locked by " |
| 784 | << (value & FUTEX_TID_MASK) << ", not " << tid; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 785 | } |
| 786 | } |
| 787 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 788 | my_robust_list::Remover remover(m); |
| 789 | unlock_pthread_mutex(m); |
| 790 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 791 | // If the atomic TID->0 transition fails (ie FUTEX_WAITERS is set), |
| 792 | if (!compare_and_swap(&m->futex, tid, 0)) { |
| 793 | // The kernel handles everything else. |
| 794 | const int ret = sys_futex_unlock_pi(&m->futex); |
| 795 | if (ret != 0) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 796 | my_robust_list::robust_head.pending_next = 0; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 797 | errno = -ret; |
| 798 | PLOG(FATAL) << "FUTEX_UNLOCK_PI(" << (&m->futex) << ") failed"; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 799 | } |
| 800 | } else { |
| 801 | // There aren't any waiters, so no need to call into the kernel. |
| 802 | } |
| 803 | } |
| 804 | |
| 805 | int mutex_trylock(aos_mutex *m) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 806 | RunObservers run_observers(m, true); |
| 807 | const uint32_t tid = get_tid(); |
| 808 | if (kPrintOperations) { |
| 809 | printf("%" PRId32 ": %p trylock\n", tid, m); |
| 810 | } |
| 811 | my_robust_list::Adder adder(m); |
| 812 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 813 | // Try an atomic 0->TID transition. |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 814 | uint32_t c = compare_and_swap_val(&m->futex, 0, tid); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 815 | |
| 816 | if (c != 0) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 817 | if (__builtin_expect((c & FUTEX_OWNER_DIED) == 0, true)) { |
| 818 | // Somebody else had it locked; we failed. |
| 819 | return 4; |
| 820 | } else { |
| 821 | // FUTEX_OWNER_DIED was set, so we have to call into the kernel to deal |
| 822 | // with resetting it. |
| 823 | const int ret = sys_futex_wait(FUTEX_TRYLOCK_PI, &m->futex, 0, NULL); |
| 824 | if (ret == 0) { |
| 825 | adder.Add(); |
| 826 | // Only clear the owner died if somebody else didn't do the recovery |
| 827 | // and then unlock before our TRYLOCK happened. |
| 828 | return mutex_finish_lock(m); |
| 829 | } else { |
| 830 | // EWOULDBLOCK means that somebody else beat us to it. |
| 831 | if (__builtin_expect(ret == -EWOULDBLOCK, true)) { |
| 832 | return 4; |
| 833 | } |
| 834 | my_robust_list::robust_head.pending_next = 0; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 835 | errno = -ret; |
| 836 | PLOG(FATAL) << "FUTEX_TRYLOCK_PI(" << (&m->futex) |
| 837 | << ", 0, NULL) failed"; |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 838 | } |
| 839 | } |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 840 | } |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 841 | |
| 842 | lock_pthread_mutex(m); |
| 843 | adder.Add(); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 844 | return 0; |
| 845 | } |
| 846 | |
| 847 | bool mutex_islocked(const aos_mutex *m) { |
| 848 | const uint32_t tid = get_tid(); |
| 849 | |
| 850 | const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_RELAXED); |
| 851 | return (value & FUTEX_TID_MASK) == tid; |
| 852 | } |
| 853 | |
Brian Silverman | 27af1f6 | 2019-11-18 12:04:48 -0800 | [diff] [blame] | 854 | void death_notification_init(aos_mutex *m) { |
| 855 | const uint32_t tid = get_tid(); |
| 856 | if (kPrintOperations) { |
| 857 | printf("%" PRId32 ": %p death_notification start\n", tid, m); |
| 858 | } |
| 859 | my_robust_list::Adder adder(m); |
| 860 | { |
| 861 | RunObservers run_observers(m, true); |
| 862 | CHECK(compare_and_swap(&m->futex, 0, tid)); |
| 863 | } |
| 864 | adder.Add(); |
| 865 | } |
| 866 | |
| 867 | void death_notification_release(aos_mutex *m) { |
| 868 | RunObservers run_observers(m, true); |
| 869 | |
| 870 | #ifndef NDEBUG |
| 871 | // Verify it's "locked", like it should be. |
| 872 | { |
| 873 | const uint32_t tid = get_tid(); |
| 874 | if (kPrintOperations) { |
| 875 | printf("%" PRId32 ": %p death_notification release\n", tid, m); |
| 876 | } |
| 877 | const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_SEQ_CST); |
| 878 | assert((value & ~FUTEX_WAITERS) == tid); |
| 879 | } |
| 880 | #endif |
| 881 | |
| 882 | my_robust_list::Remover remover(m); |
| 883 | ANNOTATE_HAPPENS_BEFORE(m); |
| 884 | const int ret = sys_futex_unlock_pi(&m->futex); |
| 885 | if (ret != 0) { |
| 886 | my_robust_list::robust_head.pending_next = 0; |
| 887 | errno = -ret; |
| 888 | PLOG(FATAL) << "FUTEX_UNLOCK_PI(" << &m->futex << ") failed"; |
| 889 | } |
| 890 | } |
| 891 | |
Austin Schuh | 0ad2b6f | 2019-06-09 21:27:07 -0700 | [diff] [blame] | 892 | int condition_wait(aos_condition *c, aos_mutex *m, struct timespec *end_time) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 893 | RunObservers run_observers(c, false); |
| 894 | const uint32_t tid = get_tid(); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 895 | const uint32_t wait_start = __atomic_load_n(c, __ATOMIC_SEQ_CST); |
| 896 | |
| 897 | mutex_unlock(m); |
| 898 | |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 899 | my_robust_list::Adder adder(m); |
| 900 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 901 | while (true) { |
| 902 | // Wait in the kernel iff the value of it doesn't change (ie somebody else |
| 903 | // does a wake) from before we unlocked the mutex. |
Austin Schuh | 0ad2b6f | 2019-06-09 21:27:07 -0700 | [diff] [blame] | 904 | int ret = sys_futex_wait_requeue_pi(c, wait_start, end_time, &m->futex); |
| 905 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 906 | if (ret != 0) { |
Austin Schuh | 0ad2b6f | 2019-06-09 21:27:07 -0700 | [diff] [blame] | 907 | // Timed out waiting. Signal that back up to the user. |
| 908 | if (__builtin_expect(ret == -ETIMEDOUT, true)) { |
| 909 | // We have to relock it ourself because the kernel didn't do it. |
| 910 | const int r = mutex_do_get(m, false, nullptr, tid); |
| 911 | assert(__builtin_expect(r == 0 || r == 1, true)); |
| 912 | adder.Add(); |
| 913 | |
| 914 | // OWNER_DIED takes priority. Pass it on if we found it. |
| 915 | if (r == 1) return r; |
| 916 | // Otherwise communicate that we were interrupted. |
| 917 | return -1; |
| 918 | } |
| 919 | |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 920 | // If it failed because somebody else did a wake and changed the value |
| 921 | // before we actually made it to sleep. |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 922 | if (__builtin_expect(ret == -EAGAIN, true)) { |
| 923 | // There's no need to unconditionally set FUTEX_WAITERS here if we're |
| 924 | // using REQUEUE_PI because the kernel automatically does that in the |
| 925 | // REQUEUE_PI iff it requeued anybody. |
| 926 | // If we're not using REQUEUE_PI, then everything is just normal locks |
| 927 | // etc, so there's no need to do anything special there either. |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 928 | |
| 929 | // We have to relock it ourself because the kernel didn't do it. |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 930 | const int r = mutex_do_get(m, false, nullptr, tid); |
| 931 | assert(__builtin_expect(r == 0 || r == 1, true)); |
| 932 | adder.Add(); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 933 | return r; |
| 934 | } |
| 935 | // Try again if it was because of a signal. |
Austin Schuh | 0ad2b6f | 2019-06-09 21:27:07 -0700 | [diff] [blame] | 936 | if (__builtin_expect((ret == -EINTR), true)) { |
| 937 | continue; |
| 938 | } |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 939 | my_robust_list::robust_head.pending_next = 0; |
Alex Perry | cb7da4b | 2019-08-28 19:35:56 -0700 | [diff] [blame] | 940 | errno = -ret; |
| 941 | PLOG(FATAL) << "FUTEX_WAIT_REQUEUE_PI(" << c << ", " << wait_start << ", " |
| 942 | << (&m->futex) << ") failed"; |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 943 | } else { |
Brian | 2a4294f | 2019-06-12 20:23:32 -0700 | [diff] [blame] | 944 | // Record that the kernel relocked it for us. |
| 945 | lock_pthread_mutex(m); |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 946 | |
Austin Schuh | 0ad2b6f | 2019-06-09 21:27:07 -0700 | [diff] [blame] | 947 | // We succeeded in waiting, and the kernel took care of locking the |
| 948 | // mutex |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 949 | // for us and setting FUTEX_WAITERS iff it needed to (for REQUEUE_PI). |
| 950 | |
| 951 | adder.Add(); |
| 952 | |
| 953 | const uint32_t value = __atomic_load_n(&m->futex, __ATOMIC_RELAXED); |
| 954 | if (__builtin_expect((value & FUTEX_OWNER_DIED) != 0, false)) { |
| 955 | __atomic_and_fetch(&m->futex, ~FUTEX_OWNER_DIED, __ATOMIC_RELAXED); |
| 956 | return 1; |
| 957 | } else { |
| 958 | return 0; |
| 959 | } |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 960 | } |
| 961 | } |
| 962 | } |
| 963 | |
| 964 | void condition_signal(aos_condition *c, aos_mutex *m) { |
| 965 | condition_wake(c, m, 0); |
| 966 | } |
| 967 | |
| 968 | void condition_broadcast(aos_condition *c, aos_mutex *m) { |
| 969 | condition_wake(c, m, INT_MAX); |
| 970 | } |
| 971 | |
| 972 | int futex_wait_timeout(aos_futex *m, const struct timespec *timeout) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 973 | RunObservers run_observers(m, false); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 974 | const int ret = sys_futex_wait(FUTEX_WAIT, m, 0, timeout); |
| 975 | if (ret != 0) { |
| 976 | if (ret == -EINTR) { |
| 977 | return 1; |
| 978 | } else if (ret == -ETIMEDOUT) { |
| 979 | return 2; |
| 980 | } else if (ret != -EWOULDBLOCK) { |
| 981 | errno = -ret; |
| 982 | return -1; |
| 983 | } |
| 984 | } |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 985 | ANNOTATE_HAPPENS_AFTER(m); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 986 | return 0; |
| 987 | } |
| 988 | |
| 989 | int futex_wait(aos_futex *m) { return futex_wait_timeout(m, NULL); } |
| 990 | |
| 991 | int futex_set_value(aos_futex *m, uint32_t value) { |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 992 | RunObservers run_observers(m, false); |
| 993 | ANNOTATE_HAPPENS_BEFORE(m); |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 994 | __atomic_store_n(m, value, __ATOMIC_SEQ_CST); |
| 995 | const int r = sys_futex_wake(m, INT_MAX - 4096); |
| 996 | if (__builtin_expect( |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 997 | static_cast<unsigned int>(r) > static_cast<unsigned int>(-4096), |
| 998 | false)) { |
Brian Silverman | dc1eb27 | 2014-08-19 14:25:59 -0400 | [diff] [blame] | 999 | errno = -r; |
| 1000 | return -1; |
| 1001 | } else { |
| 1002 | return r; |
| 1003 | } |
| 1004 | } |
| 1005 | |
| 1006 | int futex_set(aos_futex *m) { |
| 1007 | return futex_set_value(m, 1); |
| 1008 | } |
| 1009 | |
| 1010 | int futex_unset(aos_futex *m) { |
| 1011 | return !__atomic_exchange_n(m, 0, __ATOMIC_SEQ_CST); |
| 1012 | } |
Brian Silverman | 71c55c5 | 2014-08-19 14:31:59 -0400 | [diff] [blame] | 1013 | |
| 1014 | namespace aos { |
| 1015 | namespace linux_code { |
| 1016 | namespace ipc_lib { |
| 1017 | |
| 1018 | // Sets functions to run befor eand after all futex operations. |
| 1019 | // This is important when doing robustness testing because the memory has to be |
| 1020 | // made writable for the whole futex operation, otherwise it never succeeds. |
| 1021 | void SetFutexAccessorObservers(FutexAccessorObserver before, |
| 1022 | FutexAccessorObserver after) { |
| 1023 | before_observer = before; |
| 1024 | after_observer = after; |
| 1025 | } |
| 1026 | |
| 1027 | // Sets an extra offset between mutexes and the value we use for them in the |
| 1028 | // robust list (only the forward pointers). This is used to work around a kernel |
| 1029 | // bug by keeping a second set of mutexes which is always writable so the kernel |
| 1030 | // won't go into an infinite loop when trying to unlock them. |
| 1031 | void SetRobustListOffset(ptrdiff_t offset) { |
| 1032 | my_robust_list::SetRobustListOffset(offset); |
| 1033 | } |
| 1034 | |
| 1035 | // Returns true iff there are any mutexes locked by the current thread. |
| 1036 | // This is mainly useful for testing. |
| 1037 | bool HaveLockedMutexes() { |
| 1038 | return my_robust_list::HaveLockedMutexes(); |
| 1039 | } |
| 1040 | |
| 1041 | } // namespace ipc_lib |
| 1042 | } // namespace linux_code |
| 1043 | } // namespace aos |