blob: e441ac7e67340ca55706ec91e47d029d631cac4e [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2/* Copyright (c) 2006, Google Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
8 *
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
14 * distribution.
15 * * Neither the name of Google Inc. nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ---
32 * Author: Sanjay Ghemawat
33 */
34
35// Implementation of atomic operations for x86. This file should not
36// be included directly. Clients should instead include
37// "base/atomicops.h".
38
39#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
40#define BASE_ATOMICOPS_INTERNALS_X86_H_
41#include "base/basictypes.h"
42
43typedef int32_t Atomic32;
44#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic*
45
46
47// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it
48// already matches Atomic32 or Atomic64, depending on the platform.
49
50
51// This struct is not part of the public API of this module; clients may not
52// use it.
53// Features of this x86. Values may not be correct before main() is run,
54// but are set conservatively.
55struct AtomicOps_x86CPUFeatureStruct {
56 bool has_sse2; // Processor has SSE2.
57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction.
58};
59
60ATTRIBUTE_VISIBILITY_HIDDEN
61extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
62
63
64#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
65
66
67namespace base {
68namespace subtle {
69
70typedef int64_t Atomic64;
71
72// 32-bit low-level operations on any platform.
73
74inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
75 Atomic32 old_value,
76 Atomic32 new_value) {
77 Atomic32 prev;
78 __asm__ __volatile__("lock; cmpxchgl %1,%2"
79 : "=a" (prev)
80 : "q" (new_value), "m" (*ptr), "0" (old_value)
81 : "memory");
82 return prev;
83}
84
85inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
86 Atomic32 new_value) {
87 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
88 : "=r" (new_value)
89 : "m" (*ptr), "0" (new_value)
90 : "memory");
91 return new_value; // Now it's the previous value.
92}
93
94inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
95 Atomic32 new_value) {
96 Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
97 return old_val;
98}
99
100inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
101 Atomic32 new_value) {
102 // xchgl already has release memory barrier semantics.
103 return NoBarrier_AtomicExchange(ptr, new_value);
104}
105
106inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
107 Atomic32 old_value,
108 Atomic32 new_value) {
109 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
110 return x;
111}
112
113inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
114 Atomic32 old_value,
115 Atomic32 new_value) {
116 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
117}
118
119inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
120 *ptr = value;
121}
122
123#if defined(__x86_64__)
124
125// 64-bit implementations of memory barrier can be simpler, because it
126// "mfence" is guaranteed to exist.
127inline void MemoryBarrier() {
128 __asm__ __volatile__("mfence" : : : "memory");
129}
130
131inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
132 *ptr = value;
133 MemoryBarrier();
134}
135
136#else
137
138inline void MemoryBarrier() {
139 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
140 __asm__ __volatile__("mfence" : : : "memory");
141 } else { // mfence is faster but not present on PIII
142 Atomic32 x = 0;
143 Acquire_AtomicExchange(&x, 0);
144 }
145}
146
147inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
148 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
149 *ptr = value;
150 __asm__ __volatile__("mfence" : : : "memory");
151 } else {
152 Acquire_AtomicExchange(ptr, value);
153 }
154}
155#endif
156
157inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
158 ATOMICOPS_COMPILER_BARRIER();
159 *ptr = value; // An x86 store acts as a release barrier.
160 // See comments in Atomic64 version of Release_Store(), below.
161}
162
163inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
164 return *ptr;
165}
166
167inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
168 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
169 // See comments in Atomic64 version of Release_Store(), below.
170 ATOMICOPS_COMPILER_BARRIER();
171 return value;
172}
173
174inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
175 MemoryBarrier();
176 return *ptr;
177}
178
179#if defined(__x86_64__)
180
181// 64-bit low-level operations on 64-bit platform.
182
183inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
184 Atomic64 old_value,
185 Atomic64 new_value) {
186 Atomic64 prev;
187 __asm__ __volatile__("lock; cmpxchgq %1,%2"
188 : "=a" (prev)
189 : "q" (new_value), "m" (*ptr), "0" (old_value)
190 : "memory");
191 return prev;
192}
193
194inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
195 Atomic64 new_value) {
196 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
197 : "=r" (new_value)
198 : "m" (*ptr), "0" (new_value)
199 : "memory");
200 return new_value; // Now it's the previous value.
201}
202
203inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
204 Atomic64 new_value) {
205 Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
206 return old_val;
207}
208
209inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
210 Atomic64 new_value) {
211 // xchgq already has release memory barrier semantics.
212 return NoBarrier_AtomicExchange(ptr, new_value);
213}
214
215inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
216 *ptr = value;
217}
218
219inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
220 *ptr = value;
221 MemoryBarrier();
222}
223
224inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
225 ATOMICOPS_COMPILER_BARRIER();
226
227 *ptr = value; // An x86 store acts as a release barrier
228 // for current AMD/Intel chips as of Jan 2008.
229 // See also Acquire_Load(), below.
230
231 // When new chips come out, check:
232 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
233 // System Programming Guide, Chatper 7: Multiple-processor management,
234 // Section 7.2, Memory Ordering.
235 // Last seen at:
236 // http://developer.intel.com/design/pentium4/manuals/index_new.htm
237 //
238 // x86 stores/loads fail to act as barriers for a few instructions (clflush
239 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
240 // not generated by the compiler, and are rare. Users of these instructions
241 // need to know about cache behaviour in any case since all of these involve
242 // either flushing cache lines or non-temporal cache hints.
243}
244
245inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
246 return *ptr;
247}
248
249inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
250 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
251 // for current AMD/Intel chips as of Jan 2008.
252 // See also Release_Store(), above.
253 ATOMICOPS_COMPILER_BARRIER();
254 return value;
255}
256
257inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
258 MemoryBarrier();
259 return *ptr;
260}
261
262#else // defined(__x86_64__)
263
264// 64-bit low-level operations on 32-bit platform.
265
266#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
267// For compilers older than gcc 4.1, we use inline asm.
268//
269// Potential pitfalls:
270//
271// 1. %ebx points to Global offset table (GOT) with -fPIC.
272// We need to preserve this register.
273// 2. When explicit registers are used in inline asm, the
274// compiler may not be aware of it and might try to reuse
275// the same register for another argument which has constraints
276// that allow it ("r" for example).
277
278inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
279 Atomic64 old_value,
280 Atomic64 new_value) {
281 Atomic64 prev;
282 __asm__ __volatile__("push %%ebx\n\t"
283 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into
284 "movl 4(%3), %%ecx\n\t" // ecx:ebx
285 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same
286 "pop %%ebx\n\t"
287 : "=A" (prev) // as contents of ptr:
288 : "D" (ptr), // ecx:ebx => ptr
289 "0" (old_value), // else:
290 "S" (&new_value) // old *ptr => edx:eax
291 : "memory", "%ecx");
292 return prev;
293}
294#endif // Compiler < gcc-4.1
295
296inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
297 Atomic64 old_val,
298 Atomic64 new_val) {
299 return __sync_val_compare_and_swap(ptr, old_val, new_val);
300}
301
302inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
303 Atomic64 new_val) {
304 Atomic64 old_val;
305
306 do {
307 old_val = *ptr;
308 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
309
310 return old_val;
311}
312
313inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
314 Atomic64 new_val) {
315 Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
316 return old_val;
317}
318
319inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
320 Atomic64 new_val) {
321 return NoBarrier_AtomicExchange(ptr, new_val);
322}
323
324inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
325 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
326 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
327 "emms\n\t" // Empty mmx state/Reset FP regs
328 : "=m" (*ptr)
329 : "m" (value)
330 : // mark the FP stack and mmx registers as clobbered
331 "st", "st(1)", "st(2)", "st(3)", "st(4)",
332 "st(5)", "st(6)", "st(7)", "mm0", "mm1",
333 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
334}
335
336inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
337 NoBarrier_Store(ptr, value);
338 MemoryBarrier();
339}
340
341inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
342 ATOMICOPS_COMPILER_BARRIER();
343 NoBarrier_Store(ptr, value);
344}
345
346inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
347 Atomic64 value;
348 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
349 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
350 "emms\n\t" // Empty mmx state/Reset FP regs
351 : "=m" (value)
352 : "m" (*ptr)
353 : // mark the FP stack and mmx registers as clobbered
354 "st", "st(1)", "st(2)", "st(3)", "st(4)",
355 "st(5)", "st(6)", "st(7)", "mm0", "mm1",
356 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
357 return value;
358}
359
360inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
361 Atomic64 value = NoBarrier_Load(ptr);
362 ATOMICOPS_COMPILER_BARRIER();
363 return value;
364}
365
366inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
367 MemoryBarrier();
368 return NoBarrier_Load(ptr);
369}
370
371#endif // defined(__x86_64__)
372
373inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
374 Atomic64 old_value,
375 Atomic64 new_value) {
376 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
377 return x;
378}
379
380inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
381 Atomic64 old_value,
382 Atomic64 new_value) {
383 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
384}
385
386} // namespace base::subtle
387} // namespace base
388
389#undef ATOMICOPS_COMPILER_BARRIER
390
391#endif // BASE_ATOMICOPS_INTERNALS_X86_H_