Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | /* Copyright (c) 2006, Google Inc. |
| 3 | * All rights reserved. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 4 | * |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions are |
| 7 | * met: |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 8 | * |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 9 | * * Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * * Redistributions in binary form must reproduce the above |
| 12 | * copyright notice, this list of conditions and the following disclaimer |
| 13 | * in the documentation and/or other materials provided with the |
| 14 | * distribution. |
| 15 | * * Neither the name of Google Inc. nor the names of its |
| 16 | * contributors may be used to endorse or promote products derived from |
| 17 | * this software without specific prior written permission. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 18 | * |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | * |
| 31 | * --- |
| 32 | * Author: Sanjay Ghemawat |
| 33 | */ |
| 34 | |
| 35 | // Implementation of atomic operations for x86. This file should not |
| 36 | // be included directly. Clients should instead include |
| 37 | // "base/atomicops.h". |
| 38 | |
| 39 | #ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ |
| 40 | #define BASE_ATOMICOPS_INTERNALS_X86_H_ |
| 41 | #include "base/basictypes.h" |
| 42 | |
| 43 | typedef int32_t Atomic32; |
| 44 | #define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* |
| 45 | |
| 46 | |
| 47 | // NOTE(vchen): x86 does not need to define AtomicWordCastType, because it |
| 48 | // already matches Atomic32 or Atomic64, depending on the platform. |
| 49 | |
| 50 | |
| 51 | // This struct is not part of the public API of this module; clients may not |
| 52 | // use it. |
| 53 | // Features of this x86. Values may not be correct before main() is run, |
| 54 | // but are set conservatively. |
| 55 | struct AtomicOps_x86CPUFeatureStruct { |
| 56 | bool has_sse2; // Processor has SSE2. |
| 57 | bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. |
| 58 | }; |
| 59 | |
| 60 | ATTRIBUTE_VISIBILITY_HIDDEN |
| 61 | extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; |
| 62 | |
| 63 | |
| 64 | #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") |
| 65 | |
| 66 | |
| 67 | namespace base { |
| 68 | namespace subtle { |
| 69 | |
| 70 | typedef int64_t Atomic64; |
| 71 | |
| 72 | // 32-bit low-level operations on any platform. |
| 73 | |
| 74 | inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
| 75 | Atomic32 old_value, |
| 76 | Atomic32 new_value) { |
| 77 | Atomic32 prev; |
| 78 | __asm__ __volatile__("lock; cmpxchgl %1,%2" |
| 79 | : "=a" (prev) |
| 80 | : "q" (new_value), "m" (*ptr), "0" (old_value) |
| 81 | : "memory"); |
| 82 | return prev; |
| 83 | } |
| 84 | |
| 85 | inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
| 86 | Atomic32 new_value) { |
| 87 | __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. |
| 88 | : "=r" (new_value) |
| 89 | : "m" (*ptr), "0" (new_value) |
| 90 | : "memory"); |
| 91 | return new_value; // Now it's the previous value. |
| 92 | } |
| 93 | |
| 94 | inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, |
| 95 | Atomic32 new_value) { |
| 96 | Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value); |
| 97 | return old_val; |
| 98 | } |
| 99 | |
| 100 | inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, |
| 101 | Atomic32 new_value) { |
| 102 | // xchgl already has release memory barrier semantics. |
| 103 | return NoBarrier_AtomicExchange(ptr, new_value); |
| 104 | } |
| 105 | |
| 106 | inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
| 107 | Atomic32 old_value, |
| 108 | Atomic32 new_value) { |
| 109 | Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 110 | return x; |
| 111 | } |
| 112 | |
| 113 | inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
| 114 | Atomic32 old_value, |
| 115 | Atomic32 new_value) { |
| 116 | return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 117 | } |
| 118 | |
| 119 | inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 120 | *ptr = value; |
| 121 | } |
| 122 | |
| 123 | #if defined(__x86_64__) |
| 124 | |
| 125 | // 64-bit implementations of memory barrier can be simpler, because it |
| 126 | // "mfence" is guaranteed to exist. |
| 127 | inline void MemoryBarrier() { |
| 128 | __asm__ __volatile__("mfence" : : : "memory"); |
| 129 | } |
| 130 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 131 | #else |
| 132 | |
| 133 | inline void MemoryBarrier() { |
| 134 | if (AtomicOps_Internalx86CPUFeatures.has_sse2) { |
| 135 | __asm__ __volatile__("mfence" : : : "memory"); |
| 136 | } else { // mfence is faster but not present on PIII |
| 137 | Atomic32 x = 0; |
| 138 | Acquire_AtomicExchange(&x, 0); |
| 139 | } |
| 140 | } |
| 141 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 142 | #endif |
| 143 | |
| 144 | inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 145 | ATOMICOPS_COMPILER_BARRIER(); |
| 146 | *ptr = value; // An x86 store acts as a release barrier. |
| 147 | // See comments in Atomic64 version of Release_Store(), below. |
| 148 | } |
| 149 | |
| 150 | inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { |
| 151 | return *ptr; |
| 152 | } |
| 153 | |
| 154 | inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
| 155 | Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. |
| 156 | // See comments in Atomic64 version of Release_Store(), below. |
| 157 | ATOMICOPS_COMPILER_BARRIER(); |
| 158 | return value; |
| 159 | } |
| 160 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 161 | #if defined(__x86_64__) |
| 162 | |
| 163 | // 64-bit low-level operations on 64-bit platform. |
| 164 | |
| 165 | inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, |
| 166 | Atomic64 old_value, |
| 167 | Atomic64 new_value) { |
| 168 | Atomic64 prev; |
| 169 | __asm__ __volatile__("lock; cmpxchgq %1,%2" |
| 170 | : "=a" (prev) |
| 171 | : "q" (new_value), "m" (*ptr), "0" (old_value) |
| 172 | : "memory"); |
| 173 | return prev; |
| 174 | } |
| 175 | |
| 176 | inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, |
| 177 | Atomic64 new_value) { |
| 178 | __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. |
| 179 | : "=r" (new_value) |
| 180 | : "m" (*ptr), "0" (new_value) |
| 181 | : "memory"); |
| 182 | return new_value; // Now it's the previous value. |
| 183 | } |
| 184 | |
| 185 | inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, |
| 186 | Atomic64 new_value) { |
| 187 | Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value); |
| 188 | return old_val; |
| 189 | } |
| 190 | |
| 191 | inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, |
| 192 | Atomic64 new_value) { |
| 193 | // xchgq already has release memory barrier semantics. |
| 194 | return NoBarrier_AtomicExchange(ptr, new_value); |
| 195 | } |
| 196 | |
| 197 | inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 198 | *ptr = value; |
| 199 | } |
| 200 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 201 | inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 202 | ATOMICOPS_COMPILER_BARRIER(); |
| 203 | |
| 204 | *ptr = value; // An x86 store acts as a release barrier |
| 205 | // for current AMD/Intel chips as of Jan 2008. |
| 206 | // See also Acquire_Load(), below. |
| 207 | |
| 208 | // When new chips come out, check: |
| 209 | // IA-32 Intel Architecture Software Developer's Manual, Volume 3: |
| 210 | // System Programming Guide, Chatper 7: Multiple-processor management, |
| 211 | // Section 7.2, Memory Ordering. |
| 212 | // Last seen at: |
| 213 | // http://developer.intel.com/design/pentium4/manuals/index_new.htm |
| 214 | // |
| 215 | // x86 stores/loads fail to act as barriers for a few instructions (clflush |
| 216 | // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are |
| 217 | // not generated by the compiler, and are rare. Users of these instructions |
| 218 | // need to know about cache behaviour in any case since all of these involve |
| 219 | // either flushing cache lines or non-temporal cache hints. |
| 220 | } |
| 221 | |
| 222 | inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { |
| 223 | return *ptr; |
| 224 | } |
| 225 | |
| 226 | inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { |
| 227 | Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, |
| 228 | // for current AMD/Intel chips as of Jan 2008. |
| 229 | // See also Release_Store(), above. |
| 230 | ATOMICOPS_COMPILER_BARRIER(); |
| 231 | return value; |
| 232 | } |
| 233 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 234 | #else // defined(__x86_64__) |
| 235 | |
| 236 | // 64-bit low-level operations on 32-bit platform. |
| 237 | |
| 238 | #if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) |
| 239 | // For compilers older than gcc 4.1, we use inline asm. |
| 240 | // |
| 241 | // Potential pitfalls: |
| 242 | // |
| 243 | // 1. %ebx points to Global offset table (GOT) with -fPIC. |
| 244 | // We need to preserve this register. |
| 245 | // 2. When explicit registers are used in inline asm, the |
| 246 | // compiler may not be aware of it and might try to reuse |
| 247 | // the same register for another argument which has constraints |
| 248 | // that allow it ("r" for example). |
| 249 | |
| 250 | inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, |
| 251 | Atomic64 old_value, |
| 252 | Atomic64 new_value) { |
| 253 | Atomic64 prev; |
| 254 | __asm__ __volatile__("push %%ebx\n\t" |
| 255 | "movl (%3), %%ebx\n\t" // Move 64-bit new_value into |
| 256 | "movl 4(%3), %%ecx\n\t" // ecx:ebx |
| 257 | "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same |
| 258 | "pop %%ebx\n\t" |
| 259 | : "=A" (prev) // as contents of ptr: |
| 260 | : "D" (ptr), // ecx:ebx => ptr |
| 261 | "0" (old_value), // else: |
| 262 | "S" (&new_value) // old *ptr => edx:eax |
| 263 | : "memory", "%ecx"); |
| 264 | return prev; |
| 265 | } |
| 266 | #endif // Compiler < gcc-4.1 |
| 267 | |
| 268 | inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, |
| 269 | Atomic64 old_val, |
| 270 | Atomic64 new_val) { |
| 271 | return __sync_val_compare_and_swap(ptr, old_val, new_val); |
| 272 | } |
| 273 | |
| 274 | inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, |
| 275 | Atomic64 new_val) { |
| 276 | Atomic64 old_val; |
| 277 | |
| 278 | do { |
| 279 | old_val = *ptr; |
| 280 | } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); |
| 281 | |
| 282 | return old_val; |
| 283 | } |
| 284 | |
| 285 | inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, |
| 286 | Atomic64 new_val) { |
| 287 | Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val); |
| 288 | return old_val; |
| 289 | } |
| 290 | |
| 291 | inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, |
| 292 | Atomic64 new_val) { |
| 293 | return NoBarrier_AtomicExchange(ptr, new_val); |
| 294 | } |
| 295 | |
| 296 | inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 297 | __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic |
| 298 | "movq %%mm0, %0\n\t" // moves (ptr could be read-only) |
| 299 | "emms\n\t" // Empty mmx state/Reset FP regs |
| 300 | : "=m" (*ptr) |
| 301 | : "m" (value) |
| 302 | : // mark the FP stack and mmx registers as clobbered |
| 303 | "st", "st(1)", "st(2)", "st(3)", "st(4)", |
| 304 | "st(5)", "st(6)", "st(7)", "mm0", "mm1", |
| 305 | "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); |
| 306 | } |
| 307 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 308 | inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 309 | ATOMICOPS_COMPILER_BARRIER(); |
| 310 | NoBarrier_Store(ptr, value); |
| 311 | } |
| 312 | |
| 313 | inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { |
| 314 | Atomic64 value; |
| 315 | __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic |
| 316 | "movq %%mm0, %0\n\t" // moves (ptr could be read-only) |
| 317 | "emms\n\t" // Empty mmx state/Reset FP regs |
| 318 | : "=m" (value) |
| 319 | : "m" (*ptr) |
| 320 | : // mark the FP stack and mmx registers as clobbered |
| 321 | "st", "st(1)", "st(2)", "st(3)", "st(4)", |
| 322 | "st(5)", "st(6)", "st(7)", "mm0", "mm1", |
| 323 | "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); |
| 324 | return value; |
| 325 | } |
| 326 | |
| 327 | inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { |
| 328 | Atomic64 value = NoBarrier_Load(ptr); |
| 329 | ATOMICOPS_COMPILER_BARRIER(); |
| 330 | return value; |
| 331 | } |
| 332 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 333 | #endif // defined(__x86_64__) |
| 334 | |
| 335 | inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, |
| 336 | Atomic64 old_value, |
| 337 | Atomic64 new_value) { |
| 338 | Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 339 | return x; |
| 340 | } |
| 341 | |
| 342 | inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, |
| 343 | Atomic64 old_value, |
| 344 | Atomic64 new_value) { |
| 345 | return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 346 | } |
| 347 | |
| 348 | } // namespace base::subtle |
| 349 | } // namespace base |
| 350 | |
| 351 | #undef ATOMICOPS_COMPILER_BARRIER |
| 352 | |
| 353 | #endif // BASE_ATOMICOPS_INTERNALS_X86_H_ |