Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/base/internal/spinlock.h" |
| 16 | |
| 17 | #include <algorithm> |
| 18 | #include <atomic> |
| 19 | #include <limits> |
| 20 | |
| 21 | #include "absl/base/attributes.h" |
| 22 | #include "absl/base/internal/atomic_hook.h" |
| 23 | #include "absl/base/internal/cycleclock.h" |
| 24 | #include "absl/base/internal/spinlock_wait.h" |
| 25 | #include "absl/base/internal/sysinfo.h" /* For NumCPUs() */ |
| 26 | #include "absl/base/call_once.h" |
| 27 | |
| 28 | // Description of lock-word: |
| 29 | // 31..00: [............................3][2][1][0] |
| 30 | // |
| 31 | // [0]: kSpinLockHeld |
| 32 | // [1]: kSpinLockCooperative |
| 33 | // [2]: kSpinLockDisabledScheduling |
| 34 | // [31..3]: ONLY kSpinLockSleeper OR |
| 35 | // Wait time in cycles >> PROFILE_TIMESTAMP_SHIFT |
| 36 | // |
| 37 | // Detailed descriptions: |
| 38 | // |
| 39 | // Bit [0]: The lock is considered held iff kSpinLockHeld is set. |
| 40 | // |
| 41 | // Bit [1]: Eligible waiters (e.g. Fibers) may co-operatively reschedule when |
| 42 | // contended iff kSpinLockCooperative is set. |
| 43 | // |
| 44 | // Bit [2]: This bit is exclusive from bit [1]. It is used only by a |
| 45 | // non-cooperative lock. When set, indicates that scheduling was |
| 46 | // successfully disabled when the lock was acquired. May be unset, |
| 47 | // even if non-cooperative, if a ThreadIdentity did not yet exist at |
| 48 | // time of acquisition. |
| 49 | // |
| 50 | // Bit [3]: If this is the only upper bit ([31..3]) set then this lock was |
| 51 | // acquired without contention, however, at least one waiter exists. |
| 52 | // |
| 53 | // Otherwise, bits [31..3] represent the time spent by the current lock |
| 54 | // holder to acquire the lock. There may be outstanding waiter(s). |
| 55 | |
| 56 | namespace absl { |
| 57 | namespace base_internal { |
| 58 | |
| 59 | ABSL_CONST_INIT static base_internal::AtomicHook<void (*)(const void *lock, |
| 60 | int64_t wait_cycles)> |
| 61 | submit_profile_data; |
| 62 | |
| 63 | void RegisterSpinLockProfiler(void (*fn)(const void *contendedlock, |
| 64 | int64_t wait_cycles)) { |
| 65 | submit_profile_data.Store(fn); |
| 66 | } |
| 67 | |
| 68 | // Uncommon constructors. |
| 69 | SpinLock::SpinLock(base_internal::SchedulingMode mode) |
| 70 | : lockword_(IsCooperative(mode) ? kSpinLockCooperative : 0) { |
| 71 | ABSL_TSAN_MUTEX_CREATE(this, __tsan_mutex_not_static); |
| 72 | } |
| 73 | |
| 74 | SpinLock::SpinLock(base_internal::LinkerInitialized, |
| 75 | base_internal::SchedulingMode mode) { |
| 76 | ABSL_TSAN_MUTEX_CREATE(this, 0); |
| 77 | if (IsCooperative(mode)) { |
| 78 | InitLinkerInitializedAndCooperative(); |
| 79 | } |
| 80 | // Otherwise, lockword_ is already initialized. |
| 81 | } |
| 82 | |
| 83 | // Static (linker initialized) spinlocks always start life as functional |
| 84 | // non-cooperative locks. When their static constructor does run, it will call |
| 85 | // this initializer to augment the lockword with the cooperative bit. By |
| 86 | // actually taking the lock when we do this we avoid the need for an atomic |
| 87 | // operation in the regular unlock path. |
| 88 | // |
| 89 | // SlowLock() must be careful to re-test for this bit so that any outstanding |
| 90 | // waiters may be upgraded to cooperative status. |
| 91 | void SpinLock::InitLinkerInitializedAndCooperative() { |
| 92 | Lock(); |
| 93 | lockword_.fetch_or(kSpinLockCooperative, std::memory_order_relaxed); |
| 94 | Unlock(); |
| 95 | } |
| 96 | |
| 97 | // Monitor the lock to see if its value changes within some time period |
| 98 | // (adaptive_spin_count loop iterations). The last value read from the lock |
| 99 | // is returned from the method. |
| 100 | uint32_t SpinLock::SpinLoop() { |
| 101 | // We are already in the slow path of SpinLock, initialize the |
| 102 | // adaptive_spin_count here. |
| 103 | ABSL_CONST_INIT static absl::once_flag init_adaptive_spin_count; |
| 104 | ABSL_CONST_INIT static int adaptive_spin_count = 0; |
| 105 | base_internal::LowLevelCallOnce(&init_adaptive_spin_count, []() { |
| 106 | adaptive_spin_count = base_internal::NumCPUs() > 1 ? 1000 : 1; |
| 107 | }); |
| 108 | |
| 109 | int c = adaptive_spin_count; |
| 110 | uint32_t lock_value; |
| 111 | do { |
| 112 | lock_value = lockword_.load(std::memory_order_relaxed); |
| 113 | } while ((lock_value & kSpinLockHeld) != 0 && --c > 0); |
| 114 | return lock_value; |
| 115 | } |
| 116 | |
| 117 | void SpinLock::SlowLock() { |
| 118 | uint32_t lock_value = SpinLoop(); |
| 119 | lock_value = TryLockInternal(lock_value, 0); |
| 120 | if ((lock_value & kSpinLockHeld) == 0) { |
| 121 | return; |
| 122 | } |
| 123 | // The lock was not obtained initially, so this thread needs to wait for |
| 124 | // it. Record the current timestamp in the local variable wait_start_time |
| 125 | // so the total wait time can be stored in the lockword once this thread |
| 126 | // obtains the lock. |
| 127 | int64_t wait_start_time = CycleClock::Now(); |
| 128 | uint32_t wait_cycles = 0; |
| 129 | int lock_wait_call_count = 0; |
| 130 | while ((lock_value & kSpinLockHeld) != 0) { |
| 131 | // If the lock is currently held, but not marked as having a sleeper, mark |
| 132 | // it as having a sleeper. |
| 133 | if ((lock_value & kWaitTimeMask) == 0) { |
| 134 | // Here, just "mark" that the thread is going to sleep. Don't store the |
| 135 | // lock wait time in the lock as that will cause the current lock |
| 136 | // owner to think it experienced contention. |
| 137 | if (lockword_.compare_exchange_strong( |
| 138 | lock_value, lock_value | kSpinLockSleeper, |
| 139 | std::memory_order_relaxed, std::memory_order_relaxed)) { |
| 140 | // Successfully transitioned to kSpinLockSleeper. Pass |
| 141 | // kSpinLockSleeper to the SpinLockWait routine to properly indicate |
| 142 | // the last lock_value observed. |
| 143 | lock_value |= kSpinLockSleeper; |
| 144 | } else if ((lock_value & kSpinLockHeld) == 0) { |
| 145 | // Lock is free again, so try and acquire it before sleeping. The |
| 146 | // new lock state will be the number of cycles this thread waited if |
| 147 | // this thread obtains the lock. |
| 148 | lock_value = TryLockInternal(lock_value, wait_cycles); |
| 149 | continue; // Skip the delay at the end of the loop. |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | base_internal::SchedulingMode scheduling_mode; |
| 154 | if ((lock_value & kSpinLockCooperative) != 0) { |
| 155 | scheduling_mode = base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL; |
| 156 | } else { |
| 157 | scheduling_mode = base_internal::SCHEDULE_KERNEL_ONLY; |
| 158 | } |
| 159 | // SpinLockDelay() calls into fiber scheduler, we need to see |
| 160 | // synchronization there to avoid false positives. |
| 161 | ABSL_TSAN_MUTEX_PRE_DIVERT(this, 0); |
| 162 | // Wait for an OS specific delay. |
| 163 | base_internal::SpinLockDelay(&lockword_, lock_value, ++lock_wait_call_count, |
| 164 | scheduling_mode); |
| 165 | ABSL_TSAN_MUTEX_POST_DIVERT(this, 0); |
| 166 | // Spin again after returning from the wait routine to give this thread |
| 167 | // some chance of obtaining the lock. |
| 168 | lock_value = SpinLoop(); |
| 169 | wait_cycles = EncodeWaitCycles(wait_start_time, CycleClock::Now()); |
| 170 | lock_value = TryLockInternal(lock_value, wait_cycles); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | void SpinLock::SlowUnlock(uint32_t lock_value) { |
| 175 | base_internal::SpinLockWake(&lockword_, |
| 176 | false); // wake waiter if necessary |
| 177 | |
| 178 | // If our acquisition was contended, collect contentionz profile info. We |
| 179 | // reserve a unitary wait time to represent that a waiter exists without our |
| 180 | // own acquisition having been contended. |
| 181 | if ((lock_value & kWaitTimeMask) != kSpinLockSleeper) { |
| 182 | const uint64_t wait_cycles = DecodeWaitCycles(lock_value); |
| 183 | ABSL_TSAN_MUTEX_PRE_DIVERT(this, 0); |
| 184 | submit_profile_data(this, wait_cycles); |
| 185 | ABSL_TSAN_MUTEX_POST_DIVERT(this, 0); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | // We use the upper 29 bits of the lock word to store the time spent waiting to |
| 190 | // acquire this lock. This is reported by contentionz profiling. Since the |
| 191 | // lower bits of the cycle counter wrap very quickly on high-frequency |
| 192 | // processors we divide to reduce the granularity to 2^PROFILE_TIMESTAMP_SHIFT |
| 193 | // sized units. On a 4Ghz machine this will lose track of wait times greater |
| 194 | // than (2^29/4 Ghz)*128 =~ 17.2 seconds. Such waits should be extremely rare. |
| 195 | enum { PROFILE_TIMESTAMP_SHIFT = 7 }; |
| 196 | enum { LOCKWORD_RESERVED_SHIFT = 3 }; // We currently reserve the lower 3 bits. |
| 197 | |
| 198 | uint32_t SpinLock::EncodeWaitCycles(int64_t wait_start_time, |
| 199 | int64_t wait_end_time) { |
| 200 | static const int64_t kMaxWaitTime = |
| 201 | std::numeric_limits<uint32_t>::max() >> LOCKWORD_RESERVED_SHIFT; |
| 202 | int64_t scaled_wait_time = |
| 203 | (wait_end_time - wait_start_time) >> PROFILE_TIMESTAMP_SHIFT; |
| 204 | |
| 205 | // Return a representation of the time spent waiting that can be stored in |
| 206 | // the lock word's upper bits. |
| 207 | uint32_t clamped = static_cast<uint32_t>( |
| 208 | std::min(scaled_wait_time, kMaxWaitTime) << LOCKWORD_RESERVED_SHIFT); |
| 209 | |
| 210 | if (clamped == 0) { |
| 211 | return kSpinLockSleeper; // Just wake waiters, but don't record contention. |
| 212 | } |
| 213 | // Bump up value if necessary to avoid returning kSpinLockSleeper. |
| 214 | const uint32_t kMinWaitTime = |
| 215 | kSpinLockSleeper + (1 << LOCKWORD_RESERVED_SHIFT); |
| 216 | if (clamped == kSpinLockSleeper) { |
| 217 | return kMinWaitTime; |
| 218 | } |
| 219 | return clamped; |
| 220 | } |
| 221 | |
| 222 | uint64_t SpinLock::DecodeWaitCycles(uint32_t lock_value) { |
| 223 | // Cast to uint32_t first to ensure bits [63:32] are cleared. |
| 224 | const uint64_t scaled_wait_time = |
| 225 | static_cast<uint32_t>(lock_value & kWaitTimeMask); |
| 226 | return scaled_wait_time |
| 227 | << (PROFILE_TIMESTAMP_SHIFT - LOCKWORD_RESERVED_SHIFT); |
| 228 | } |
| 229 | |
| 230 | } // namespace base_internal |
| 231 | } // namespace absl |