blob: 94c7aac77784d9153d7c7179caa87b1827c7ecdd [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2/* Copyright (c) 2006, Google Inc.
3 * All rights reserved.
Brian Silverman20350ac2021-11-17 18:19:55 -08004 *
Austin Schuh745610d2015-09-06 18:19:50 -07005 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
Brian Silverman20350ac2021-11-17 18:19:55 -08008 *
Austin Schuh745610d2015-09-06 18:19:50 -07009 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
14 * distribution.
15 * * Neither the name of Google Inc. nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
Brian Silverman20350ac2021-11-17 18:19:55 -080018 *
Austin Schuh745610d2015-09-06 18:19:50 -070019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ---
32 * Author: Sanjay Ghemawat
33 */
34
35// Implementation of atomic operations for x86. This file should not
36// be included directly. Clients should instead include
37// "base/atomicops.h".
38
39#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
40#define BASE_ATOMICOPS_INTERNALS_X86_H_
41#include "base/basictypes.h"
42
43typedef int32_t Atomic32;
44#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic*
45
46
47// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it
48// already matches Atomic32 or Atomic64, depending on the platform.
49
50
51// This struct is not part of the public API of this module; clients may not
52// use it.
53// Features of this x86. Values may not be correct before main() is run,
54// but are set conservatively.
55struct AtomicOps_x86CPUFeatureStruct {
56 bool has_sse2; // Processor has SSE2.
57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction.
58};
59
60ATTRIBUTE_VISIBILITY_HIDDEN
61extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
62
63
64#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
65
66
67namespace base {
68namespace subtle {
69
70typedef int64_t Atomic64;
71
72// 32-bit low-level operations on any platform.
73
74inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
75 Atomic32 old_value,
76 Atomic32 new_value) {
77 Atomic32 prev;
78 __asm__ __volatile__("lock; cmpxchgl %1,%2"
79 : "=a" (prev)
80 : "q" (new_value), "m" (*ptr), "0" (old_value)
81 : "memory");
82 return prev;
83}
84
85inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
86 Atomic32 new_value) {
87 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
88 : "=r" (new_value)
89 : "m" (*ptr), "0" (new_value)
90 : "memory");
91 return new_value; // Now it's the previous value.
92}
93
94inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
95 Atomic32 new_value) {
96 Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
97 return old_val;
98}
99
100inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
101 Atomic32 new_value) {
102 // xchgl already has release memory barrier semantics.
103 return NoBarrier_AtomicExchange(ptr, new_value);
104}
105
106inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
107 Atomic32 old_value,
108 Atomic32 new_value) {
109 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
110 return x;
111}
112
113inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
114 Atomic32 old_value,
115 Atomic32 new_value) {
116 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
117}
118
119inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
120 *ptr = value;
121}
122
123#if defined(__x86_64__)
124
125// 64-bit implementations of memory barrier can be simpler, because it
126// "mfence" is guaranteed to exist.
127inline void MemoryBarrier() {
128 __asm__ __volatile__("mfence" : : : "memory");
129}
130
Austin Schuh745610d2015-09-06 18:19:50 -0700131#else
132
133inline void MemoryBarrier() {
134 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
135 __asm__ __volatile__("mfence" : : : "memory");
136 } else { // mfence is faster but not present on PIII
137 Atomic32 x = 0;
138 Acquire_AtomicExchange(&x, 0);
139 }
140}
141
Austin Schuh745610d2015-09-06 18:19:50 -0700142#endif
143
144inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
145 ATOMICOPS_COMPILER_BARRIER();
146 *ptr = value; // An x86 store acts as a release barrier.
147 // See comments in Atomic64 version of Release_Store(), below.
148}
149
150inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
151 return *ptr;
152}
153
154inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
155 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
156 // See comments in Atomic64 version of Release_Store(), below.
157 ATOMICOPS_COMPILER_BARRIER();
158 return value;
159}
160
Austin Schuh745610d2015-09-06 18:19:50 -0700161#if defined(__x86_64__)
162
163// 64-bit low-level operations on 64-bit platform.
164
165inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
166 Atomic64 old_value,
167 Atomic64 new_value) {
168 Atomic64 prev;
169 __asm__ __volatile__("lock; cmpxchgq %1,%2"
170 : "=a" (prev)
171 : "q" (new_value), "m" (*ptr), "0" (old_value)
172 : "memory");
173 return prev;
174}
175
176inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
177 Atomic64 new_value) {
178 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
179 : "=r" (new_value)
180 : "m" (*ptr), "0" (new_value)
181 : "memory");
182 return new_value; // Now it's the previous value.
183}
184
185inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
186 Atomic64 new_value) {
187 Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
188 return old_val;
189}
190
191inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
192 Atomic64 new_value) {
193 // xchgq already has release memory barrier semantics.
194 return NoBarrier_AtomicExchange(ptr, new_value);
195}
196
197inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
198 *ptr = value;
199}
200
Austin Schuh745610d2015-09-06 18:19:50 -0700201inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
202 ATOMICOPS_COMPILER_BARRIER();
203
204 *ptr = value; // An x86 store acts as a release barrier
205 // for current AMD/Intel chips as of Jan 2008.
206 // See also Acquire_Load(), below.
207
208 // When new chips come out, check:
209 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
210 // System Programming Guide, Chatper 7: Multiple-processor management,
211 // Section 7.2, Memory Ordering.
212 // Last seen at:
213 // http://developer.intel.com/design/pentium4/manuals/index_new.htm
214 //
215 // x86 stores/loads fail to act as barriers for a few instructions (clflush
216 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
217 // not generated by the compiler, and are rare. Users of these instructions
218 // need to know about cache behaviour in any case since all of these involve
219 // either flushing cache lines or non-temporal cache hints.
220}
221
222inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
223 return *ptr;
224}
225
226inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
227 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
228 // for current AMD/Intel chips as of Jan 2008.
229 // See also Release_Store(), above.
230 ATOMICOPS_COMPILER_BARRIER();
231 return value;
232}
233
Austin Schuh745610d2015-09-06 18:19:50 -0700234#else // defined(__x86_64__)
235
236// 64-bit low-level operations on 32-bit platform.
237
238#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
239// For compilers older than gcc 4.1, we use inline asm.
240//
241// Potential pitfalls:
242//
243// 1. %ebx points to Global offset table (GOT) with -fPIC.
244// We need to preserve this register.
245// 2. When explicit registers are used in inline asm, the
246// compiler may not be aware of it and might try to reuse
247// the same register for another argument which has constraints
248// that allow it ("r" for example).
249
250inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
251 Atomic64 old_value,
252 Atomic64 new_value) {
253 Atomic64 prev;
254 __asm__ __volatile__("push %%ebx\n\t"
255 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into
256 "movl 4(%3), %%ecx\n\t" // ecx:ebx
257 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same
258 "pop %%ebx\n\t"
259 : "=A" (prev) // as contents of ptr:
260 : "D" (ptr), // ecx:ebx => ptr
261 "0" (old_value), // else:
262 "S" (&new_value) // old *ptr => edx:eax
263 : "memory", "%ecx");
264 return prev;
265}
266#endif // Compiler < gcc-4.1
267
268inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
269 Atomic64 old_val,
270 Atomic64 new_val) {
271 return __sync_val_compare_and_swap(ptr, old_val, new_val);
272}
273
274inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
275 Atomic64 new_val) {
276 Atomic64 old_val;
277
278 do {
279 old_val = *ptr;
280 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
281
282 return old_val;
283}
284
285inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
286 Atomic64 new_val) {
287 Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
288 return old_val;
289}
290
291inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
292 Atomic64 new_val) {
293 return NoBarrier_AtomicExchange(ptr, new_val);
294}
295
296inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
297 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
298 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
299 "emms\n\t" // Empty mmx state/Reset FP regs
300 : "=m" (*ptr)
301 : "m" (value)
302 : // mark the FP stack and mmx registers as clobbered
303 "st", "st(1)", "st(2)", "st(3)", "st(4)",
304 "st(5)", "st(6)", "st(7)", "mm0", "mm1",
305 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
306}
307
Austin Schuh745610d2015-09-06 18:19:50 -0700308inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
309 ATOMICOPS_COMPILER_BARRIER();
310 NoBarrier_Store(ptr, value);
311}
312
313inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
314 Atomic64 value;
315 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
316 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
317 "emms\n\t" // Empty mmx state/Reset FP regs
318 : "=m" (value)
319 : "m" (*ptr)
320 : // mark the FP stack and mmx registers as clobbered
321 "st", "st(1)", "st(2)", "st(3)", "st(4)",
322 "st(5)", "st(6)", "st(7)", "mm0", "mm1",
323 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
324 return value;
325}
326
327inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
328 Atomic64 value = NoBarrier_Load(ptr);
329 ATOMICOPS_COMPILER_BARRIER();
330 return value;
331}
332
Austin Schuh745610d2015-09-06 18:19:50 -0700333#endif // defined(__x86_64__)
334
335inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
336 Atomic64 old_value,
337 Atomic64 new_value) {
338 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
339 return x;
340}
341
342inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
343 Atomic64 old_value,
344 Atomic64 new_value) {
345 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
346}
347
348} // namespace base::subtle
349} // namespace base
350
351#undef ATOMICOPS_COMPILER_BARRIER
352
353#endif // BASE_ATOMICOPS_INTERNALS_X86_H_