Squashed 'third_party/gperftools/' content from commit 54505f1 Change-Id: Id02e833828732b0efe7dac722b8485279e67c5fa git-subtree-dir: third_party/gperftools git-subtree-split: 54505f1d50c2d1f4676f5e87090b64a117fd980e

commit: 745610d16119f59479f84918a66456ece9d6d461 [log] [tgz]
author: Austin Schuh <austin@peloton-tech.com> Sun Sep 06 18:19:50 2015 -0700
committer: Austin Schuh <austin@peloton-tech.com> Sun Sep 06 18:19:50 2015 -0700
tree: 135f4ea4b4c31e809bdbaba6221da5cffb29fd88
diff --git a/src/base/arm_instruction_set_select.h b/src/base/arm_instruction_set_select.h
new file mode 100644
index 0000000..6fde685
--- /dev/null
+++ b/src/base/arm_instruction_set_select.h

@@ -0,0 +1,84 @@
+// Copyright (c) 2011, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Alexander Levitskiy
+//
+// Generalizes the plethora of ARM flavors available to an easier to manage set
+// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto
+
+#ifndef ARM_INSTRUCTION_SET_SELECT_H_
+#define ARM_INSTRUCTION_SET_SELECT_H_
+
+#if defined(__ARM_ARCH_8A__)
+# define ARMV8 1
+#endif
+
+#if defined(ARMV8) || \
+    defined(__ARM_ARCH_7__) || \
+    defined(__ARM_ARCH_7R__) || \
+    defined(__ARM_ARCH_7A__)
+# define ARMV7 1
+#endif
+
+#if defined(ARMV7) || \
+    defined(__ARM_ARCH_6__) || \
+    defined(__ARM_ARCH_6J__) || \
+    defined(__ARM_ARCH_6K__) || \
+    defined(__ARM_ARCH_6Z__) || \
+    defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6ZK__)
+# define ARMV6 1
+#endif
+
+#if defined(ARMV6) || \
+    defined(__ARM_ARCH_5T__) || \
+    defined(__ARM_ARCH_5E__) || \
+    defined(__ARM_ARCH_5TE__) || \
+    defined(__ARM_ARCH_5TEJ__)
+# define ARMV5 1
+#endif
+
+#if defined(ARMV5) || \
+    defined(__ARM_ARCH_4__) || \
+    defined(__ARM_ARCH_4T__)
+# define ARMV4 1
+#endif
+
+#if defined(ARMV4) || \
+    defined(__ARM_ARCH_3__) || \
+    defined(__ARM_ARCH_3M__)
+# define ARMV3 1
+#endif
+
+#if defined(ARMV3) || \
+    defined(__ARM_ARCH_2__)
+# define ARMV2 1
+#endif
+
+#endif  // ARM_INSTRUCTION_SET_SELECT_H_

diff --git a/src/base/atomicops-internals-arm-generic.h b/src/base/atomicops-internals-arm-generic.h
new file mode 100644
index 0000000..d0f9413
--- /dev/null
+++ b/src/base/atomicops-internals-arm-generic.h

@@ -0,0 +1,228 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2003, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Author: Lei Zhang, Sasha Levitskiy
+//
+// This file is an internal atomic implementation, use base/atomicops.h instead.
+//
+// LinuxKernelCmpxchg is from Google Gears.
+
+#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
+#define BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "base/basictypes.h"
+
+typedef int32_t Atomic32;
+
+namespace base {
+namespace subtle {
+
+typedef int64_t Atomic64;
+
+// 0xffff0fc0 is the hard coded address of a function provided by
+// the kernel which implements an atomic compare-exchange. On older
+// ARM architecture revisions (pre-v6) this may be implemented using
+// a syscall. This address is stable, and in active use (hard coded)
+// by at least glibc-2.7 and the Android C library.
+// pLinuxKernelCmpxchg has both acquire and release barrier sematincs.
+typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value,
+                                           Atomic32 new_value,
+                                           volatile Atomic32* ptr);
+LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg ATTRIBUTE_WEAK =
+    (LinuxKernelCmpxchgFunc) 0xffff0fc0;
+
+typedef void (*LinuxKernelMemoryBarrierFunc)(void);
+LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK =
+    (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
+
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev_value = *ptr;
+  do {
+    if (!pLinuxKernelCmpxchg(old_value, new_value,
+                             const_cast<Atomic32*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
+    old_value = *ptr;
+  } while (pLinuxKernelCmpxchg(old_value, new_value,
+                               const_cast<Atomic32*>(ptr)));
+  return old_value;
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+inline void MemoryBarrier() {
+  pLinuxKernelMemoryBarrier();
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+  MemoryBarrier();
+  *ptr = value;
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+  Atomic32 value = *ptr;
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+
+// 64-bit versions are not implemented yet.
+
+inline void NotImplementedFatalError(const char *function_name) {
+  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
+          function_name);
+  abort();
+}
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  NotImplementedFatalError("NoBarrier_CompareAndSwap");
+  return 0;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  NotImplementedFatalError("NoBarrier_AtomicExchange");
+  return 0;
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NotImplementedFatalError("NoBarrier_Store");
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NotImplementedFatalError("Acquire_Store64");
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NotImplementedFatalError("Release_Store");
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  NotImplementedFatalError("NoBarrier_Load");
+  return 0;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  NotImplementedFatalError("Atomic64 Acquire_Load");
+  return 0;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  NotImplementedFatalError("Atomic64 Release_Load");
+  return 0;
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap");
+  return 0;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  NotImplementedFatalError("Atomic64 Release_CompareAndSwap");
+  return 0;
+}
+
+}  // namespace base::subtle
+}  // namespace base
+
+#endif  // BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_

diff --git a/src/base/atomicops-internals-arm-v6plus.h b/src/base/atomicops-internals-arm-v6plus.h
new file mode 100644
index 0000000..35f1048
--- /dev/null
+++ b/src/base/atomicops-internals-arm-v6plus.h

@@ -0,0 +1,330 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2011, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Author: Sasha Levitskiy
+// based on atomicops-internals by Sanjay Ghemawat
+//
+// This file is an internal atomic implementation, use base/atomicops.h instead.
+//
+// This code implements ARM atomics for architectures V6 and  newer.
+
+#ifndef BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
+#define BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "base/basictypes.h"  // For COMPILE_ASSERT
+
+// The LDREXD and STREXD instructions in ARM all v7 variants or above.  In v6,
+// only some variants support it.  For simplicity, we only use exclusive
+// 64-bit load/store in V7 or above.
+#if defined(ARMV7)
+# define BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
+#endif
+
+typedef int32_t Atomic32;
+
+namespace base {
+namespace subtle {
+
+typedef int64_t Atomic64;
+
+// 32-bit low-level ops
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 oldval, res;
+  do {
+    __asm__ __volatile__(
+    "ldrex   %1, [%3]\n"
+    "mov     %0, #0\n"
+    "teq     %1, %4\n"
+    // The following IT (if-then) instruction is needed for the subsequent
+    // conditional instruction STREXEQ when compiling in THUMB mode.
+    // In ARM mode, the compiler/assembler will not generate any code for it.
+    "it      eq\n"
+    "strexeq %0, %5, [%3]\n"
+        : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
+        : "r" (ptr), "Ir" (old_value), "r" (new_value)
+        : "cc");
+  } while (res);
+  return oldval;
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  Atomic32 tmp, old;
+  __asm__ __volatile__(
+      "1:\n"
+      "ldrex  %1, [%2]\n"
+      "strex  %0, %3, [%2]\n"
+      "teq    %0, #0\n"
+      "bne    1b"
+      : "=&r" (tmp), "=&r" (old)
+      : "r" (ptr), "r" (new_value)
+      : "cc", "memory");
+  return old;
+}
+
+inline void MemoryBarrier() {
+#if !defined(ARMV7)
+  uint32_t dest = 0;
+  __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
+#else
+  __asm__ __volatile__("dmb" : : : "memory");
+#endif
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
+  MemoryBarrier();
+  return old_value;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  MemoryBarrier();
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  MemoryBarrier();
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+  MemoryBarrier();
+  *ptr = value;
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+  Atomic32 value = *ptr;
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+// 64-bit versions are only available if LDREXD and STREXD instructions
+// are available.
+#ifdef BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
+
+#define BASE_HAS_ATOMIC64 1
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 oldval, res;
+  do {
+    __asm__ __volatile__(
+    "ldrexd   %1, [%3]\n"
+    "mov      %0, #0\n"
+    "teq      %Q1, %Q4\n"
+    // The following IT (if-then) instructions are needed for the subsequent
+    // conditional instructions when compiling in THUMB mode.
+    // In ARM mode, the compiler/assembler will not generate any code for it.
+    "it       eq\n"
+    "teqeq    %R1, %R4\n"
+    "it       eq\n"
+    "strexdeq %0, %5, [%3]\n"
+        : "=&r" (res), "=&r" (oldval), "+Q" (*ptr)
+        : "r" (ptr), "Ir" (old_value), "r" (new_value)
+        : "cc");
+  } while (res);
+  return oldval;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  int store_failed;
+  Atomic64 old;
+  __asm__ __volatile__(
+      "1:\n"
+      "ldrexd  %1, [%2]\n"
+      "strexd  %0, %3, [%2]\n"
+      "teq     %0, #0\n"
+      "bne     1b"
+      : "=&r" (store_failed), "=&r" (old)
+      : "r" (ptr), "r" (new_value)
+      : "cc", "memory");
+  return old;
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
+  MemoryBarrier();
+  return old_value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  MemoryBarrier();
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  int store_failed;
+  Atomic64 dummy;
+  __asm__ __volatile__(
+      "1:\n"
+      // Dummy load to lock cache line.
+      "ldrexd  %1, [%3]\n"
+      "strexd  %0, %2, [%3]\n"
+      "teq     %0, #0\n"
+      "bne     1b"
+      : "=&r" (store_failed), "=&r"(dummy)
+      : "r"(value), "r" (ptr)
+      : "cc", "memory");
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  Atomic64 res;
+  __asm__ __volatile__(
+  "ldrexd   %0, [%1]\n"
+  "clrex\n"
+      : "=r" (res)
+      : "r"(ptr), "Q"(*ptr));
+  return res;
+}
+
+#else // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
+
+inline void NotImplementedFatalError(const char *function_name) {
+  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
+          function_name);
+  abort();
+}
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  NotImplementedFatalError("NoBarrier_CompareAndSwap");
+  return 0;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  NotImplementedFatalError("NoBarrier_AtomicExchange");
+  return 0;
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  NotImplementedFatalError("Acquire_AtomicExchange");
+  return 0;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  NotImplementedFatalError("Release_AtomicExchange");
+  return 0;
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NotImplementedFatalError("NoBarrier_Store");
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  NotImplementedFatalError("NoBarrier_Load");
+  return 0;
+}
+
+#endif // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NoBarrier_Store(ptr, value);
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  MemoryBarrier();
+  NoBarrier_Store(ptr, value);
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  Atomic64 value = NoBarrier_Load(ptr);
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  MemoryBarrier();
+  return NoBarrier_Load(ptr);
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  MemoryBarrier();
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+}  // namespace subtle ends
+}  // namespace base ends
+
+#endif  // BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_

diff --git a/src/base/atomicops-internals-gcc.h b/src/base/atomicops-internals-gcc.h
new file mode 100644
index 0000000..f8d2786
--- /dev/null
+++ b/src/base/atomicops-internals-gcc.h

@@ -0,0 +1,203 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2014, Linaro
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Author: Riku Voipio, riku.voipio@linaro.org
+//
+// atomic primitives implemented with gcc atomic intrinsics:
+// http://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
+//
+
+#ifndef BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
+#define BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "base/basictypes.h"
+
+typedef int32_t Atomic32;
+
+namespace base {
+namespace subtle {
+
+typedef int64_t Atomic64;
+
+inline void MemoryBarrier() {
+    __sync_synchronize();
+}
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev_value = old_value;
+  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
+          0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+  return prev_value;
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELAXED);
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value,  __ATOMIC_ACQUIRE);
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELEASE);
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 prev_value = old_value;
+  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
+          0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+  return prev_value;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 prev_value = old_value;
+  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
+          0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
+  return prev_value;
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+  MemoryBarrier();
+  *ptr = value;
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+  Atomic32 value = *ptr;
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+// 64-bit versions
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 prev_value = old_value;
+  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
+          0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+  return prev_value;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELAXED);
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value,  __ATOMIC_ACQUIRE);
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELEASE);
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 prev_value = old_value;
+  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
+          0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+  return prev_value;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 prev_value = old_value;
+  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
+          0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
+  return prev_value;
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  MemoryBarrier();
+  *ptr = value;
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  Atomic64 value = *ptr;
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+}  // namespace base::subtle
+}  // namespace base
+
+#endif  // BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_

diff --git a/src/base/atomicops-internals-linuxppc.h b/src/base/atomicops-internals-linuxppc.h
new file mode 100644
index 0000000..b52fdf0
--- /dev/null
+++ b/src/base/atomicops-internals-linuxppc.h

@@ -0,0 +1,437 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2008, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ */
+
+// Implementation of atomic operations for ppc-linux.  This file should not
+// be included directly.  Clients should instead include
+// "base/atomicops.h".
+
+#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
+#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
+
+typedef int32_t Atomic32;
+
+#ifdef __PPC64__
+#define BASE_HAS_ATOMIC64 1
+#endif
+
+namespace base {
+namespace subtle {
+
+static inline void _sync(void) {
+  __asm__ __volatile__("sync": : : "memory");
+}
+
+static inline void _lwsync(void) {
+  // gcc defines __NO_LWSYNC__ when appropriate; see
+  //    http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01238.html
+#ifdef __NO_LWSYNC__
+  __asm__ __volatile__("msync": : : "memory");
+#else
+  __asm__ __volatile__("lwsync": : : "memory");
+#endif
+}
+
+static inline void _isync(void) {
+  __asm__ __volatile__("isync": : : "memory");
+}
+
+static inline Atomic32 OSAtomicAdd32(Atomic32 amount, Atomic32 *value) {
+  Atomic32 t;
+  __asm__ __volatile__(
+"1:		lwarx   %0,0,%3\n\
+		add     %0,%2,%0\n\
+		stwcx.  %0,0,%3 \n\
+		bne-    1b"
+		: "=&r" (t), "+m" (*value)
+		: "r" (amount), "r" (value)
+                : "cc");
+  return t;
+}
+
+static inline Atomic32 OSAtomicAdd32Barrier(Atomic32 amount, Atomic32 *value) {
+  Atomic32 t;
+  _lwsync();
+  t = OSAtomicAdd32(amount, value);
+  // This is based on the code snippet in the architecture manual (Vol
+  // 2, Appendix B).  It's a little tricky: correctness depends on the
+  // fact that the code right before this (in OSAtomicAdd32) has a
+  // conditional branch with a data dependency on the update.
+  // Otherwise, we'd have to use sync.
+  _isync();
+  return t;
+}
+
+static inline bool OSAtomicCompareAndSwap32(Atomic32 old_value,
+                                            Atomic32 new_value,
+                                            Atomic32 *value) {
+  Atomic32 prev;
+  __asm__ __volatile__(
+"1:		lwarx   %0,0,%2\n\
+		cmpw    0,%0,%3\n\
+		bne-    2f\n\
+		stwcx.  %4,0,%2\n\
+		bne-    1b\n\
+2:"
+                : "=&r" (prev), "+m" (*value)
+                : "r" (value), "r" (old_value), "r" (new_value)
+                : "cc");
+  return prev == old_value;
+}
+
+static inline Atomic32 OSAtomicCompareAndSwap32Acquire(Atomic32 old_value,
+                                                       Atomic32 new_value,
+                                                       Atomic32 *value) {
+  Atomic32 t;
+  t = OSAtomicCompareAndSwap32(old_value, new_value, value);
+  // This is based on the code snippet in the architecture manual (Vol
+  // 2, Appendix B).  It's a little tricky: correctness depends on the
+  // fact that the code right before this (in
+  // OSAtomicCompareAndSwap32) has a conditional branch with a data
+  // dependency on the update.  Otherwise, we'd have to use sync.
+  _isync();
+  return t;
+}
+
+static inline Atomic32 OSAtomicCompareAndSwap32Release(Atomic32 old_value,
+                                                       Atomic32 new_value,
+                                                       Atomic32 *value) {
+  _lwsync();
+  return OSAtomicCompareAndSwap32(old_value, new_value, value);
+}
+
+typedef int64_t Atomic64;
+
+inline void MemoryBarrier() {
+  // This can't be _lwsync(); we need to order the immediately
+  // preceding stores against any load that may follow, but lwsync
+  // doesn't guarantee that.
+  _sync();
+}
+
+// 32-bit Versions.
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap32(old_value, new_value,
+                                 const_cast<Atomic32*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr,
+                                         Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap32(old_value, new_value,
+                                     const_cast<Atomic32*>(ptr)));
+  return old_value;
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
+                                       Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value,
+                                            const_cast<Atomic32*>(ptr)));
+  return old_value;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
+                                       Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap32Release(old_value, new_value,
+                                            const_cast<Atomic32*>(ptr)));
+  return old_value;
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap32Acquire(old_value, new_value,
+                                        const_cast<Atomic32*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap32Release(old_value, new_value,
+                                        const_cast<Atomic32*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+#ifdef __PPC64__
+
+// 64-bit Versions.
+
+static inline Atomic64 OSAtomicAdd64(Atomic64 amount, Atomic64 *value) {
+  Atomic64 t;
+  __asm__ __volatile__(
+"1:		ldarx   %0,0,%3\n\
+		add     %0,%2,%0\n\
+		stdcx.  %0,0,%3 \n\
+		bne-    1b"
+		: "=&r" (t), "+m" (*value)
+		: "r" (amount), "r" (value)
+                : "cc");
+  return t;
+}
+
+static inline Atomic64 OSAtomicAdd64Barrier(Atomic64 amount, Atomic64 *value) {
+  Atomic64 t;
+  _lwsync();
+  t = OSAtomicAdd64(amount, value);
+  // This is based on the code snippet in the architecture manual (Vol
+  // 2, Appendix B).  It's a little tricky: correctness depends on the
+  // fact that the code right before this (in OSAtomicAdd64) has a
+  // conditional branch with a data dependency on the update.
+  // Otherwise, we'd have to use sync.
+  _isync();
+  return t;
+}
+
+static inline bool OSAtomicCompareAndSwap64(Atomic64 old_value,
+                                            Atomic64 new_value,
+                                            Atomic64 *value) {
+  Atomic64 prev;
+  __asm__ __volatile__(
+"1:		ldarx   %0,0,%2\n\
+		cmpd    0,%0,%3\n\
+		bne-    2f\n\
+		stdcx.  %4,0,%2\n\
+		bne-    1b\n\
+2:"
+                : "=&r" (prev), "+m" (*value)
+                : "r" (value), "r" (old_value), "r" (new_value)
+                : "cc");
+  return prev == old_value;
+}
+
+static inline Atomic64 OSAtomicCompareAndSwap64Acquire(Atomic64 old_value,
+                                                       Atomic64 new_value,
+                                                       Atomic64 *value) {
+  Atomic64 t;
+  t = OSAtomicCompareAndSwap64(old_value, new_value, value);
+  // This is based on the code snippet in the architecture manual (Vol
+  // 2, Appendix B).  It's a little tricky: correctness depends on the
+  // fact that the code right before this (in
+  // OSAtomicCompareAndSwap64) has a conditional branch with a data
+  // dependency on the update.  Otherwise, we'd have to use sync.
+  _isync();
+  return t;
+}
+
+static inline Atomic64 OSAtomicCompareAndSwap64Release(Atomic64 old_value,
+                                                       Atomic64 new_value,
+                                                       Atomic64 *value) {
+  _lwsync();
+  return OSAtomicCompareAndSwap64(old_value, new_value, value);
+}
+
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap64(old_value, new_value,
+                                 const_cast<Atomic64*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr,
+                                         Atomic64 new_value) {
+  Atomic64 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap64(old_value, new_value,
+                                     const_cast<Atomic64*>(ptr)));
+  return old_value;
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
+                                       Atomic64 new_value) {
+  Atomic64 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value,
+                                            const_cast<Atomic64*>(ptr)));
+  return old_value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
+                                       Atomic64 new_value) {
+  Atomic64 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap64Release(old_value, new_value,
+                                            const_cast<Atomic64*>(ptr)));
+  return old_value;
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap64Acquire(old_value, new_value,
+                                        const_cast<Atomic64*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap64Release(old_value, new_value,
+                                        const_cast<Atomic64*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+#endif
+
+inline void NoBarrier_Store(volatile Atomic32 *ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) {
+  *ptr = value;
+  // This can't be _lwsync(); we need to order the immediately
+  // preceding stores against any load that may follow, but lwsync
+  // doesn't guarantee that.
+  _sync();
+}
+
+inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) {
+  _lwsync();
+  *ptr = value;
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32 *ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) {
+  Atomic32 value = *ptr;
+  _lwsync();
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32 *ptr) {
+  // This can't be _lwsync(); we need to order the immediately
+  // preceding stores against any load that may follow, but lwsync
+  // doesn't guarantee that.
+  _sync();
+  return *ptr;
+}
+
+#ifdef __PPC64__
+
+// 64-bit Versions.
+
+inline void NoBarrier_Store(volatile Atomic64 *ptr, Atomic64 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
+  *ptr = value;
+  // This can't be _lwsync(); we need to order the immediately
+  // preceding stores against any load that may follow, but lwsync
+  // doesn't guarantee that.
+  _sync();
+}
+
+inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
+  _lwsync();
+  *ptr = value;
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64 *ptr) {
+  return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
+  Atomic64 value = *ptr;
+  _lwsync();
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
+  // This can't be _lwsync(); we need to order the immediately
+  // preceding stores against any load that may follow, but lwsync
+  // doesn't guarantee that.
+  _sync();
+  return *ptr;
+}
+
+#endif
+
+}   // namespace base::subtle
+}   // namespace base
+
+#endif  // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_

diff --git a/src/base/atomicops-internals-macosx.h b/src/base/atomicops-internals-macosx.h
new file mode 100644
index 0000000..b5130d4
--- /dev/null
+++ b/src/base/atomicops-internals-macosx.h

@@ -0,0 +1,370 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Implementation of atomic operations for Mac OS X.  This file should not
+// be included directly.  Clients should instead include
+// "base/atomicops.h".
+
+#ifndef BASE_ATOMICOPS_INTERNALS_MACOSX_H_
+#define BASE_ATOMICOPS_INTERNALS_MACOSX_H_
+
+typedef int32_t Atomic32;
+
+// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different
+// on the Mac, even when they are the same size.  Similarly, on __ppc64__,
+// AtomicWord and Atomic64 are always different.  Thus, we need explicit
+// casting.
+#ifdef __LP64__
+#define AtomicWordCastType base::subtle::Atomic64
+#else
+#define AtomicWordCastType Atomic32
+#endif
+
+#if defined(__LP64__) || defined(__i386__)
+#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
+#endif
+
+#include <libkern/OSAtomic.h>
+
+namespace base {
+namespace subtle {
+
+#if !defined(__LP64__) && defined(__ppc__)
+
+// The Mac 64-bit OSAtomic implementations are not available for 32-bit PowerPC,
+// while the underlying assembly instructions are available only some
+// implementations of PowerPC.
+
+// The following inline functions will fail with the error message at compile
+// time ONLY IF they are called.  So it is safe to use this header if user
+// code only calls AtomicWord and Atomic32 operations.
+//
+// NOTE(vchen): Implementation notes to implement the atomic ops below may
+// be found in "PowerPC Virtual Environment Architecture, Book II,
+// Version 2.02", January 28, 2005, Appendix B, page 46.  Unfortunately,
+// extra care must be taken to ensure data are properly 8-byte aligned, and
+// that data are returned correctly according to Mac OS X ABI specs.
+
+inline int64_t OSAtomicCompareAndSwap64(
+    int64_t oldValue, int64_t newValue, int64_t *theValue) {
+  __asm__ __volatile__(
+      "_OSAtomicCompareAndSwap64_not_supported_for_32_bit_ppc\n\t");
+  return 0;
+}
+
+inline int64_t OSAtomicAdd64(int64_t theAmount, int64_t *theValue) {
+  __asm__ __volatile__(
+      "_OSAtomicAdd64_not_supported_for_32_bit_ppc\n\t");
+  return 0;
+}
+
+inline int64_t OSAtomicCompareAndSwap64Barrier(
+    int64_t oldValue, int64_t newValue, int64_t *theValue) {
+  int64_t prev = OSAtomicCompareAndSwap64(oldValue, newValue, theValue);
+  OSMemoryBarrier();
+  return prev;
+}
+
+inline int64_t OSAtomicAdd64Barrier(
+    int64_t theAmount, int64_t *theValue) {
+  int64_t new_val = OSAtomicAdd64(theAmount, theValue);
+  OSMemoryBarrier();
+  return new_val;
+}
+#endif
+
+typedef int64_t Atomic64;
+
+inline void MemoryBarrier() {
+  OSMemoryBarrier();
+}
+
+// 32-bit Versions.
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap32(old_value, new_value,
+                                 const_cast<Atomic32*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr,
+                                         Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap32(old_value, new_value,
+                                     const_cast<Atomic32*>(ptr)));
+  return old_value;
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
+                                       Atomic32 new_value) {
+  Atomic32 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value,
+                                            const_cast<Atomic32*>(ptr)));
+  return old_value;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
+                                       Atomic32 new_value) {
+  return Acquire_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap32Barrier(old_value, new_value,
+                                        const_cast<Atomic32*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return Acquire_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) {
+  MemoryBarrier();
+  *ptr = value;
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) {
+  Atomic32 value = *ptr;
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32 *ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+// 64-bit version
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap64(old_value, new_value,
+                                 const_cast<Atomic64*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr,
+                                         Atomic64 new_value) {
+  Atomic64 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap64(old_value, new_value,
+                                     const_cast<Atomic64*>(ptr)));
+  return old_value;
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
+                                       Atomic64 new_value) {
+  Atomic64 old_value;
+  do {
+    old_value = *ptr;
+  } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value,
+                                            const_cast<Atomic64*>(ptr)));
+  return old_value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
+                                       Atomic64 new_value) {
+  return Acquire_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 prev_value;
+  do {
+    if (OSAtomicCompareAndSwap64Barrier(old_value, new_value,
+                                        const_cast<Atomic64*>(ptr))) {
+      return old_value;
+    }
+    prev_value = *ptr;
+  } while (prev_value == old_value);
+  return prev_value;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  // The lib kern interface does not distinguish between
+  // Acquire and Release memory barriers; they are equivalent.
+  return Acquire_CompareAndSwap(ptr, old_value, new_value);
+}
+
+#ifdef __LP64__
+
+// 64-bit implementation on 64-bit platform
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
+  MemoryBarrier();
+  *ptr = value;
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
+  Atomic64 value = *ptr;
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+#else
+
+// 64-bit implementation on 32-bit platform
+
+#if defined(__ppc__)
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+   __asm__ __volatile__(
+       "_NoBarrier_Store_not_supported_for_32_bit_ppc\n\t");
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+   __asm__ __volatile__(
+       "_NoBarrier_Load_not_supported_for_32_bit_ppc\n\t");
+   return 0;
+}
+
+#elif defined(__i386__)
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  __asm__ __volatile__("movq %1, %%mm0\n\t"    // Use mmx reg for 64-bit atomic
+                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
+                       "emms\n\t"              // Reset FP registers
+                       : "=m" (*ptr)
+                       : "m" (value)
+                       : // mark the FP stack and mmx registers as clobbered
+                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
+                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
+                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
+
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  Atomic64 value;
+  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
+                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
+                       "emms\n\t"            // Reset FP registers
+                       : "=m" (value)
+                       : "m" (*ptr)
+                       : // mark the FP stack and mmx registers as clobbered
+                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
+                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
+                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
+
+  return value;
+}
+#endif
+
+
+inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
+  NoBarrier_Store(ptr, value);
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
+  MemoryBarrier();
+  NoBarrier_Store(ptr, value);
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
+  Atomic64 value = NoBarrier_Load(ptr);
+  MemoryBarrier();
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
+  MemoryBarrier();
+  return NoBarrier_Load(ptr);
+}
+#endif  // __LP64__
+
+}   // namespace base::subtle
+}   // namespace base
+
+#endif  // BASE_ATOMICOPS_INTERNALS_MACOSX_H_

diff --git a/src/base/atomicops-internals-mips.h b/src/base/atomicops-internals-mips.h
new file mode 100644
index 0000000..4bfd7f6
--- /dev/null
+++ b/src/base/atomicops-internals-mips.h

@@ -0,0 +1,323 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2013, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Author: Jovan Zelincevic <jovan.zelincevic@imgtec.com>
+// based on atomicops-internals by Sanjay Ghemawat
+
+// This file is an internal atomic implementation, use base/atomicops.h instead.
+//
+// This code implements MIPS atomics.
+
+#ifndef BASE_ATOMICOPS_INTERNALS_MIPS_H_
+#define BASE_ATOMICOPS_INTERNALS_MIPS_H_
+
+#if (_MIPS_ISA == _MIPS_ISA_MIPS64)
+#define BASE_HAS_ATOMIC64 1
+#endif
+
+typedef int32_t Atomic32;
+
+namespace base {
+namespace subtle {
+
+// Atomically execute:
+// result = *ptr;
+// if (*ptr == old_value)
+// *ptr = new_value;
+// return result;
+//
+// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value".
+// Always return the old value of "*ptr"
+//
+// This routine implies no memory barriers.
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value)
+{
+    Atomic32 prev, tmp;
+    __asm__ volatile(
+        ".set   push                \n"
+        ".set   noreorder           \n"
+
+    "1:                             \n"
+        "ll     %0,     %5          \n" // prev = *ptr
+        "bne    %0,     %3,     2f  \n" // if (prev != old_value) goto 2
+        " move  %2,     %4          \n" // tmp = new_value
+        "sc     %2,     %1          \n" // *ptr = tmp (with atomic check)
+        "beqz   %2,     1b          \n" // start again on atomic error
+        " nop                       \n" // delay slot nop
+    "2:                             \n"
+
+        ".set   pop                 \n"
+        : "=&r" (prev), "=m" (*ptr),
+          "=&r" (tmp)
+        : "Ir" (old_value), "r" (new_value),
+          "m" (*ptr)
+        : "memory"
+    );
+    return prev;
+}
+
+// Atomically store new_value into *ptr, returning the previous value held in
+// *ptr. This routine implies no memory barriers.
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value)
+{
+    Atomic32 temp, old;
+    __asm__ volatile(
+        ".set   push                \n"
+        ".set   noreorder           \n"
+
+    "1:                             \n"
+        "ll     %1,     %2          \n" // old = *ptr
+        "move   %0,     %3          \n" // temp = new_value
+        "sc     %0,     %2          \n" // *ptr = temp (with atomic check)
+        "beqz   %0,     1b          \n" // start again on atomic error
+        " nop                       \n" // delay slot nop
+
+        ".set   pop                 \n"
+        : "=&r" (temp), "=&r" (old),
+          "=m" (*ptr)
+        : "r" (new_value), "m" (*ptr)
+        : "memory"
+    );
+    return old;
+}
+
+inline void MemoryBarrier()
+{
+    __asm__ volatile("sync" : : : "memory");
+}
+
+// "Acquire" operations
+// ensure that no later memory access can be reordered ahead of the operation.
+// "Release" operations ensure that no previous memory access can be reordered
+// after the operation. "Barrier" operations have both "Acquire" and "Release"
+// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
+// access.
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value)
+{
+    Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+    MemoryBarrier();
+    return res;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value)
+{
+    MemoryBarrier();
+    Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+    return res;
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value)
+{
+    *ptr = value;
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value)
+{
+    Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
+    MemoryBarrier();
+    return old_value;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value)
+{
+    MemoryBarrier();
+    return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value)
+{
+    *ptr = value;
+    MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value)
+{
+    MemoryBarrier();
+    *ptr = value;
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr)
+{
+    return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr)
+{
+    Atomic32 value = *ptr;
+    MemoryBarrier();
+    return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr)
+{
+    MemoryBarrier();
+    return *ptr;
+}
+
+#if (_MIPS_ISA == _MIPS_ISA_MIPS64) || (_MIPS_SIM == _MIPS_SIM_ABI64)
+
+typedef int64_t Atomic64;
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value)
+{
+    Atomic64 prev, tmp;
+    __asm__ volatile(
+        ".set   push                \n"
+        ".set   noreorder           \n"
+
+    "1:                             \n"
+        "lld    %0,     %5          \n" // prev = *ptr
+        "bne    %0,     %3,     2f  \n" // if (prev != old_value) goto 2
+        " move  %2,     %4          \n" // tmp = new_value
+        "scd    %2,     %1          \n" // *ptr = tmp (with atomic check)
+        "beqz   %2,     1b          \n" // start again on atomic error
+        " nop                       \n" // delay slot nop
+    "2:                             \n"
+
+        ".set   pop                 \n"
+        : "=&r" (prev), "=m" (*ptr),
+          "=&r" (tmp)
+        : "Ir" (old_value), "r" (new_value),
+          "m" (*ptr)
+        : "memory"
+    );
+    return prev;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value)
+{
+    Atomic64 temp, old;
+    __asm__ volatile(
+        ".set   push                \n"
+        ".set   noreorder           \n"
+
+    "1:                             \n"
+        "lld    %1,     %2          \n" // old = *ptr
+        "move   %0,     %3          \n" // temp = new_value
+        "scd    %0,     %2          \n" // *ptr = temp (with atomic check)
+        "beqz   %0,     1b          \n" // start again on atomic error
+        " nop                       \n" // delay slot nop
+
+        ".set   pop                 \n"
+        : "=&r" (temp), "=&r" (old),
+          "=m" (*ptr)
+        : "r" (new_value), "m" (*ptr)
+        : "memory"
+    );
+    return old;
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value)
+{
+    Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
+    MemoryBarrier();
+    return old_value;
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value)
+{
+    Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+    MemoryBarrier();
+    return res;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value)
+{
+    MemoryBarrier();
+    Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+    return res;
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value)
+{
+    *ptr = value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value)
+{
+    MemoryBarrier();
+    return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value)
+{
+    *ptr = value;
+    MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value)
+{
+    MemoryBarrier();
+    *ptr = value;
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr)
+{
+    return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr)
+{
+    Atomic64 value = *ptr;
+    MemoryBarrier();
+    return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr)
+{
+    MemoryBarrier();
+    return *ptr;
+}
+
+#endif
+
+}   // namespace base::subtle
+}   // namespace base
+
+#endif  // BASE_ATOMICOPS_INTERNALS_MIPS_H_

diff --git a/src/base/atomicops-internals-windows.h b/src/base/atomicops-internals-windows.h
new file mode 100644
index 0000000..93ced87
--- /dev/null
+++ b/src/base/atomicops-internals-windows.h

@@ -0,0 +1,457 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat
+ */
+
+// Implementation of atomic operations using Windows API
+// functions.  This file should not be included directly.  Clients
+// should instead include "base/atomicops.h".
+
+#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
+#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "base/basictypes.h"  // For COMPILE_ASSERT
+
+typedef int32 Atomic32;
+
+#if defined(_WIN64)
+#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
+#endif
+
+namespace base {
+namespace subtle {
+
+typedef int64 Atomic64;
+
+// 32-bit low-level operations on any platform
+
+extern "C" {
+// We use windows intrinsics when we can (they seem to be supported
+// well on MSVC 8.0 and above).  Unfortunately, in some
+// environments, <windows.h> and <intrin.h> have conflicting
+// declarations of some other intrinsics, breaking compilation:
+//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
+// Therefore, we simply declare the relevant intrinsics ourself.
+
+// MinGW has a bug in the header files where it doesn't indicate the
+// first argument is volatile -- they're not up to date.  See
+//   http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html
+// We have to const_cast away the volatile to avoid compiler warnings.
+// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h
+#if defined(__MINGW32__)
+inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
+                                           LONG newval, LONG oldval) {
+  return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval);
+}
+inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
+  return ::InterlockedExchange(const_cast<LONG*>(ptr), newval);
+}
+inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
+  return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment);
+}
+
+#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
+// Unfortunately, in some environments, <windows.h> and <intrin.h>
+// have conflicting declarations of some intrinsics, breaking
+// compilation.  So we declare the intrinsics we need ourselves.  See
+//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
+LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval);
+#pragma intrinsic(_InterlockedCompareExchange)
+inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
+                                           LONG newval, LONG oldval) {
+  return _InterlockedCompareExchange(ptr, newval, oldval);
+}
+
+LONG _InterlockedExchange(volatile LONG* ptr, LONG newval);
+#pragma intrinsic(_InterlockedExchange)
+inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
+  return _InterlockedExchange(ptr, newval);
+}
+
+LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment);
+#pragma intrinsic(_InterlockedExchangeAdd)
+inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
+  return _InterlockedExchangeAdd(ptr, increment);
+}
+
+#else
+inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
+                                           LONG newval, LONG oldval) {
+  return ::InterlockedCompareExchange(ptr, newval, oldval);
+}
+inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
+  return ::InterlockedExchange(ptr, newval);
+}
+inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
+  return ::InterlockedExchangeAdd(ptr, increment);
+}
+
+#endif  // ifdef __MINGW32__
+}  // extern "C"
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  LONG result = FastInterlockedCompareExchange(
+      reinterpret_cast<volatile LONG*>(ptr),
+      static_cast<LONG>(new_value),
+      static_cast<LONG>(old_value));
+  return static_cast<Atomic32>(result);
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  LONG result = FastInterlockedExchange(
+      reinterpret_cast<volatile LONG*>(ptr),
+      static_cast<LONG>(new_value));
+  return static_cast<Atomic32>(result);
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  // FastInterlockedExchange has both acquire and release memory barriers.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  // FastInterlockedExchange has both acquire and release memory barriers.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+}  // namespace base::subtle
+}  // namespace base
+
+
+// In msvc8/vs2005, winnt.h already contains a definition for
+// MemoryBarrier in the global namespace.  Add it there for earlier
+// versions and forward to it from within the namespace.
+#if !(defined(_MSC_VER) && _MSC_VER >= 1400)
+inline void MemoryBarrier() {
+  Atomic32 value = 0;
+  base::subtle::NoBarrier_AtomicExchange(&value, 0);
+                        // actually acts as a barrier in thisd implementation
+}
+#endif
+
+namespace base {
+namespace subtle {
+
+inline void MemoryBarrier() {
+  ::MemoryBarrier();
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  Acquire_AtomicExchange(ptr, value);
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
+  // See comments in Atomic64 version of Release_Store() below.
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+  Atomic32 value = *ptr;
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+// 64-bit operations
+
+#if defined(_WIN64) || defined(__MINGW64__)
+
+// 64-bit low-level operations on 64-bit platform.
+
+COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic);
+
+// These are the intrinsics needed for 64-bit operations.  Similar to the
+// 32-bit case above.
+
+extern "C" {
+#if defined(__MINGW64__)
+inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
+                                                   PVOID newval, PVOID oldval) {
+  return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
+                                             newval, oldval);
+}
+inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
+  return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
+}
+inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
+                                             LONGLONG increment) {
+  return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
+}
+
+#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
+// Like above, we need to declare the intrinsics ourselves.
+PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr,
+                                         PVOID newval, PVOID oldval);
+#pragma intrinsic(_InterlockedCompareExchangePointer)
+inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
+                                                   PVOID newval, PVOID oldval) {
+  return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
+                                            newval, oldval);
+}
+
+PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval);
+#pragma intrinsic(_InterlockedExchangePointer)
+inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
+  return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
+}
+
+LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment);
+#pragma intrinsic(_InterlockedExchangeAdd64)
+inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
+                                             LONGLONG increment) {
+  return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
+}
+
+#else
+inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
+                                                   PVOID newval, PVOID oldval) {
+  return ::InterlockedCompareExchangePointer(ptr, newval, oldval);
+}
+inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
+  return ::InterlockedExchangePointer(ptr, newval);
+}
+inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
+                                         LONGLONG increment) {
+  return ::InterlockedExchangeAdd64(ptr, increment);
+}
+
+#endif  // ifdef __MINGW64__
+}  // extern "C"
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  PVOID result = FastInterlockedCompareExchangePointer(
+    reinterpret_cast<volatile PVOID*>(ptr),
+    reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value));
+  return reinterpret_cast<Atomic64>(result);
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  PVOID result = FastInterlockedExchangePointer(
+    reinterpret_cast<volatile PVOID*>(ptr),
+    reinterpret_cast<PVOID>(new_value));
+  return reinterpret_cast<Atomic64>(result);
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NoBarrier_AtomicExchange(ptr, value);
+              // acts as a barrier in this implementation
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
+
+  // When new chips come out, check:
+  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
+  //  System Programming Guide, Chatper 7: Multiple-processor management,
+  //  Section 7.2, Memory Ordering.
+  // Last seen at:
+  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  Atomic64 value = *ptr;
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+#else  // defined(_WIN64) || defined(__MINGW64__)
+
+// 64-bit low-level operations on 32-bit platform
+
+// TODO(vchen): The GNU assembly below must be converted to MSVC inline
+// assembly.  Then the file should be renamed to ...-x86-msvc.h, probably.
+
+inline void NotImplementedFatalError(const char *function_name) {
+  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
+          function_name);
+  abort();
+}
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+#if 0 // Not implemented
+  Atomic64 prev;
+  __asm__ __volatile__("movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
+                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
+                       "lock; cmpxchg8b %1\n\t"  // If edx:eax (old_value) same
+                       : "=A" (prev)             // as contents of ptr:
+                       : "m" (*ptr),             //   ecx:ebx => ptr
+                         "0" (old_value),        // else:
+                         "r" (&new_value)        //   old *ptr => edx:eax
+                       : "memory", "%ebx", "%ecx");
+  return prev;
+#else
+  NotImplementedFatalError("NoBarrier_CompareAndSwap");
+  return 0;
+#endif
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+#if 0 // Not implemented
+  __asm__ __volatile__(
+                       "movl (%2), %%ebx\n\t"    // Move 64-bit new_value into
+                       "movl 4(%2), %%ecx\n\t"   // ecx:ebx
+                       "0:\n\t"
+                       "movl %1, %%eax\n\t"      // Read contents of ptr into
+                       "movl 4%1, %%edx\n\t"     // edx:eax
+                       "lock; cmpxchg8b %1\n\t"  // Attempt cmpxchg; if *ptr
+                       "jnz 0b\n\t"              // is no longer edx:eax, loop
+                       : "=A" (new_value)
+                       : "m" (*ptr),
+                         "r" (&new_value)
+                       : "memory", "%ebx", "%ecx");
+  return new_value;  // Now it's the previous value.
+#else
+  NotImplementedFatalError("NoBarrier_AtomicExchange");
+  return 0;
+#endif
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptrValue, Atomic64 value)
+{
+ 	__asm {
+    	movq mm0, value;  // Use mmx reg for 64-bit atomic moves
+    	mov eax, ptrValue;
+    	movq [eax], mm0;
+    	emms;            // Empty mmx state to enable FP registers
+  	}
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NoBarrier_AtomicExchange(ptr, value);
+              // acts as a barrier in this implementation
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NoBarrier_Store(ptr, value);
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptrValue)
+{
+  	Atomic64 value;
+  	__asm {
+    	mov eax, ptrValue;
+    	movq mm0, [eax]; // Use mmx reg for 64-bit atomic moves
+    	movq value, mm0;
+    	emms; // Empty mmx state to enable FP registers
+  }
+  return value;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  Atomic64 value = NoBarrier_Load(ptr);
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  MemoryBarrier();
+  return NoBarrier_Load(ptr);
+}
+
+#endif  // defined(_WIN64) || defined(__MINGW64__)
+
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  // FastInterlockedExchange has both acquire and release memory barriers.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  // FastInterlockedExchange has both acquire and release memory barriers.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+}  // namespace base::subtle
+}  // namespace base
+
+#endif  // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_

diff --git a/src/base/atomicops-internals-x86.cc b/src/base/atomicops-internals-x86.cc
new file mode 100644
index 0000000..c3391e7
--- /dev/null
+++ b/src/base/atomicops-internals-x86.cc

@@ -0,0 +1,112 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * This module gets enough CPU information to optimize the
+ * atomicops module on x86.
+ */
+
+#include "base/atomicops.h"
+#include "base/basictypes.h"
+#include "base/googleinit.h"
+#include "base/logging.h"
+#include <string.h>
+
+// This file only makes sense with atomicops-internals-x86.h -- it
+// depends on structs that are defined in that file.  If atomicops.h
+// doesn't sub-include that file, then we aren't needed, and shouldn't
+// try to do anything.
+#ifdef BASE_ATOMICOPS_INTERNALS_X86_H_
+
+// Inline cpuid instruction.  In PIC compilations, %ebx contains the address
+// of the global offset table.  To avoid breaking such executables, this code
+// must preserve that register's value across cpuid instructions.
+#if defined(__i386__)
+#define cpuid(a, b, c, d, inp) \
+  asm ("mov %%ebx, %%edi\n"    \
+       "cpuid\n"               \
+       "xchg %%edi, %%ebx\n"   \
+       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
+#elif defined (__x86_64__)
+#define cpuid(a, b, c, d, inp) \
+  asm ("mov %%rbx, %%rdi\n"    \
+       "cpuid\n"               \
+       "xchg %%rdi, %%rbx\n"   \
+       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
+#endif
+
+#if defined(cpuid)        // initialize the struct only on x86
+
+// Set the flags so that code will run correctly and conservatively
+// until InitGoogle() is called.
+struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = {
+  false,          // no SSE2
+  false           // no cmpxchg16b
+};
+
+// Initialize the AtomicOps_Internalx86CPUFeatures struct.
+static void AtomicOps_Internalx86CPUFeaturesInit() {
+  uint32 eax;
+  uint32 ebx;
+  uint32 ecx;
+  uint32 edx;
+
+  // Get vendor string (issue CPUID with eax = 0)
+  cpuid(eax, ebx, ecx, edx, 0);
+  char vendor[13];
+  memcpy(vendor, &ebx, 4);
+  memcpy(vendor + 4, &edx, 4);
+  memcpy(vendor + 8, &ecx, 4);
+  vendor[12] = 0;
+
+  // get feature flags in ecx/edx, and family/model in eax
+  cpuid(eax, ebx, ecx, edx, 1);
+
+  int family = (eax >> 8) & 0xf;        // family and model fields
+  int model = (eax >> 4) & 0xf;
+  if (family == 0xf) {                  // use extended family and model fields
+    family += (eax >> 20) & 0xff;
+    model += ((eax >> 16) & 0xf) << 4;
+  }
+
+  // edx bit 26 is SSE2 which we use to tell use whether we can use mfence
+  AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);
+
+  // ecx bit 13 indicates whether the cmpxchg16b instruction is supported
+  AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1);
+}
+
+REGISTER_MODULE_INITIALIZER(atomicops_x86, {
+  AtomicOps_Internalx86CPUFeaturesInit();
+});
+
+#endif
+
+#endif  /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ */

diff --git a/src/base/atomicops-internals-x86.h b/src/base/atomicops-internals-x86.h
new file mode 100644
index 0000000..e441ac7
--- /dev/null
+++ b/src/base/atomicops-internals-x86.h

@@ -0,0 +1,391 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat
+ */
+
+// Implementation of atomic operations for x86.  This file should not
+// be included directly.  Clients should instead include
+// "base/atomicops.h".
+
+#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
+#define BASE_ATOMICOPS_INTERNALS_X86_H_
+#include "base/basictypes.h"
+
+typedef int32_t Atomic32;
+#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
+
+
+// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it
+// already matches Atomic32 or Atomic64, depending on the platform.
+
+
+// This struct is not part of the public API of this module; clients may not
+// use it.
+// Features of this x86.  Values may not be correct before main() is run,
+// but are set conservatively.
+struct AtomicOps_x86CPUFeatureStruct {
+  bool has_sse2;            // Processor has SSE2.
+  bool has_cmpxchg16b;      // Processor supports cmpxchg16b instruction.
+};
+
+ATTRIBUTE_VISIBILITY_HIDDEN
+extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
+
+
+#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
+
+
+namespace base {
+namespace subtle {
+
+typedef int64_t Atomic64;
+
+// 32-bit low-level operations on any platform.
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev;
+  __asm__ __volatile__("lock; cmpxchgl %1,%2"
+                       : "=a" (prev)
+                       : "q" (new_value), "m" (*ptr), "0" (old_value)
+                       : "memory");
+  return prev;
+}
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
+                       : "=r" (new_value)
+                       : "m" (*ptr), "0" (new_value)
+                       : "memory");
+  return new_value;  // Now it's the previous value.
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+  return old_val;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  // xchgl already has release memory barrier semantics.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  return x;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+#if defined(__x86_64__)
+
+// 64-bit implementations of memory barrier can be simpler, because it
+// "mfence" is guaranteed to exist.
+inline void MemoryBarrier() {
+  __asm__ __volatile__("mfence" : : : "memory");
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+#else
+
+inline void MemoryBarrier() {
+  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
+    __asm__ __volatile__("mfence" : : : "memory");
+  } else { // mfence is faster but not present on PIII
+    Atomic32 x = 0;
+    Acquire_AtomicExchange(&x, 0);
+  }
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
+    *ptr = value;
+    __asm__ __volatile__("mfence" : : : "memory");
+  } else {
+    Acquire_AtomicExchange(ptr, value);
+  }
+}
+#endif
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+  ATOMICOPS_COMPILER_BARRIER();
+  *ptr = value; // An x86 store acts as a release barrier.
+  // See comments in Atomic64 version of Release_Store(), below.
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+  Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
+  // See comments in Atomic64 version of Release_Store(), below.
+  ATOMICOPS_COMPILER_BARRIER();
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+#if defined(__x86_64__)
+
+// 64-bit low-level operations on 64-bit platform.
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 prev;
+  __asm__ __volatile__("lock; cmpxchgq %1,%2"
+                       : "=a" (prev)
+                       : "q" (new_value), "m" (*ptr), "0" (old_value)
+                       : "memory");
+  return prev;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
+                       : "=r" (new_value)
+                       : "m" (*ptr), "0" (new_value)
+                       : "memory");
+  return new_value;  // Now it's the previous value.
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+  return old_val;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  // xchgq already has release memory barrier semantics.
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  ATOMICOPS_COMPILER_BARRIER();
+
+  *ptr = value; // An x86 store acts as a release barrier
+                // for current AMD/Intel chips as of Jan 2008.
+                // See also Acquire_Load(), below.
+
+  // When new chips come out, check:
+  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
+  //  System Programming Guide, Chatper 7: Multiple-processor management,
+  //  Section 7.2, Memory Ordering.
+  // Last seen at:
+  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
+  //
+  // x86 stores/loads fail to act as barriers for a few instructions (clflush
+  // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
+  // not generated by the compiler, and are rare.  Users of these instructions
+  // need to know about cache behaviour in any case since all of these involve
+  // either flushing cache lines or non-temporal cache hints.
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
+                         // for current AMD/Intel chips as of Jan 2008.
+                         // See also Release_Store(), above.
+  ATOMICOPS_COMPILER_BARRIER();
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+#else // defined(__x86_64__)
+
+// 64-bit low-level operations on 32-bit platform.
+
+#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+// For compilers older than gcc 4.1, we use inline asm.
+//
+// Potential pitfalls:
+//
+// 1. %ebx points to Global offset table (GOT) with -fPIC.
+//    We need to preserve this register.
+// 2. When explicit registers are used in inline asm, the
+//    compiler may not be aware of it and might try to reuse
+//    the same register for another argument which has constraints
+//    that allow it ("r" for example).
+
+inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
+                                            Atomic64 old_value,
+                                            Atomic64 new_value) {
+  Atomic64 prev;
+  __asm__ __volatile__("push %%ebx\n\t"
+                       "movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
+                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
+                       "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same
+                       "pop %%ebx\n\t"
+                       : "=A" (prev)             // as contents of ptr:
+                       : "D" (ptr),              //   ecx:ebx => ptr
+                         "0" (old_value),        // else:
+                         "S" (&new_value)        //   old *ptr => edx:eax
+                       : "memory", "%ecx");
+  return prev;
+}
+#endif  // Compiler < gcc-4.1
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_val,
+                                         Atomic64 new_val) {
+  return __sync_val_compare_and_swap(ptr, old_val, new_val);
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_val) {
+  Atomic64 old_val;
+
+  do {
+    old_val = *ptr;
+  } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
+
+  return old_val;
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_val) {
+  Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
+  return old_val;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_val) {
+ return NoBarrier_AtomicExchange(ptr, new_val);
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
+                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
+                       "emms\n\t"            // Empty mmx state/Reset FP regs
+                       : "=m" (*ptr)
+                       : "m" (value)
+                       : // mark the FP stack and mmx registers as clobbered
+			 "st", "st(1)", "st(2)", "st(3)", "st(4)",
+                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
+                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NoBarrier_Store(ptr, value);
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  ATOMICOPS_COMPILER_BARRIER();
+  NoBarrier_Store(ptr, value);
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  Atomic64 value;
+  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
+                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
+                       "emms\n\t"            // Empty mmx state/Reset FP regs
+                       : "=m" (value)
+                       : "m" (*ptr)
+                       : // mark the FP stack and mmx registers as clobbered
+                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
+                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
+                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
+  return value;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  Atomic64 value = NoBarrier_Load(ptr);
+  ATOMICOPS_COMPILER_BARRIER();
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  MemoryBarrier();
+  return NoBarrier_Load(ptr);
+}
+
+#endif // defined(__x86_64__)
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  return x;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+}
+
+} // namespace base::subtle
+} // namespace base
+
+#undef ATOMICOPS_COMPILER_BARRIER
+
+#endif  // BASE_ATOMICOPS_INTERNALS_X86_H_

diff --git a/src/base/atomicops.h b/src/base/atomicops.h
new file mode 100644
index 0000000..be038f3
--- /dev/null
+++ b/src/base/atomicops.h

@@ -0,0 +1,391 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat
+ */
+
+// For atomic operations on statistics counters, see atomic_stats_counter.h.
+// For atomic operations on sequence numbers, see atomic_sequence_num.h.
+// For atomic operations on reference counts, see atomic_refcount.h.
+
+// Some fast atomic operations -- typically with machine-dependent
+// implementations.  This file may need editing as Google code is
+// ported to different architectures.
+
+// The routines exported by this module are subtle.  If you use them, even if
+// you get the code right, it will depend on careful reasoning about atomicity
+// and memory ordering; it will be less readable, and harder to maintain.  If
+// you plan to use these routines, you should have a good reason, such as solid
+// evidence that performance would otherwise suffer, or there being no
+// alternative.  You should assume only properties explicitly guaranteed by the
+// specifications in this file.  You are almost certainly _not_ writing code
+// just for the x86; if you assume x86 semantics, x86 hardware bugs and
+// implementations on other archtectures will cause your code to break.  If you
+// do not know what you are doing, avoid these routines, and use a Mutex.
+//
+// These following lower-level operations are typically useful only to people
+// implementing higher-level synchronization operations like spinlocks,
+// mutexes, and condition-variables.  They combine CompareAndSwap(), a load, or
+// a store with appropriate memory-ordering instructions.  "Acquire" operations
+// ensure that no later memory access can be reordered ahead of the operation.
+// "Release" operations ensure that no previous memory access can be reordered
+// after the operation.  "Barrier" operations have both "Acquire" and "Release"
+// semantics.   A MemoryBarrier() has "Barrier" semantics, but does no memory
+// access.
+//
+// It is incorrect to make direct assignments to/from an atomic variable.
+// You should use one of the Load or Store routines.  The NoBarrier
+// versions are provided when no barriers are needed:
+//   NoBarrier_Store()
+//   NoBarrier_Load()
+// Although there are currently no compiler enforcement, you are encouraged
+// to use these.  Moreover, if you choose to use base::subtle::Atomic64 type,
+// you MUST use one of the Load or Store routines to get correct behavior
+// on 32-bit platforms.
+//
+// The intent is eventually to put all of these routines in namespace
+// base::subtle
+
+#ifndef THREAD_ATOMICOPS_H_
+#define THREAD_ATOMICOPS_H_
+
+#include <config.h>
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+// ------------------------------------------------------------------------
+// Include the platform specific implementations of the types
+// and operations listed below.  Implementations are to provide Atomic32
+// and Atomic64 operations. If there is a mismatch between intptr_t and
+// the Atomic32 or Atomic64 types for a platform, the platform-specific header
+// should define the macro, AtomicWordCastType in a clause similar to the
+// following:
+// #if ...pointers are 64 bits...
+// # define AtomicWordCastType base::subtle::Atomic64
+// #else
+// # define AtomicWordCastType Atomic32
+// #endif
+// TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?)
+// ------------------------------------------------------------------------
+
+#include "base/arm_instruction_set_select.h"
+#define GCC_VERSION (__GNUC__ * 10000                 \
+                     + __GNUC_MINOR__ * 100           \
+                     + __GNUC_PATCHLEVEL__)
+
+#if defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__GNUC__) && GCC_VERSION >= 40700
+#include "base/atomicops-internals-gcc.h"
+#elif defined(__MACH__) && defined(__APPLE__)
+#include "base/atomicops-internals-macosx.h"
+#elif defined(__GNUC__) && defined(ARMV6)
+#include "base/atomicops-internals-arm-v6plus.h"
+#elif defined(ARMV3)
+#include "base/atomicops-internals-arm-generic.h"
+#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__))
+#include "base/atomicops-internals-x86.h"
+#elif defined(_WIN32)
+#include "base/atomicops-internals-windows.h"
+#elif defined(__linux__) && defined(__PPC__)
+#include "base/atomicops-internals-linuxppc.h"
+#elif defined(__GNUC__) && defined(__mips__)
+#include "base/atomicops-internals-mips.h"
+#elif defined(__GNUC__) && GCC_VERSION >= 40700
+#include "base/atomicops-internals-gcc.h"
+#else
+#error You need to implement atomic operations for this architecture
+#endif
+
+// Signed type that can hold a pointer and supports the atomic ops below, as
+// well as atomic loads and stores.  Instances must be naturally-aligned.
+typedef intptr_t AtomicWord;
+
+#ifdef AtomicWordCastType
+// ------------------------------------------------------------------------
+// This section is needed only when explicit type casting is required to
+// cast AtomicWord to one of the basic atomic types (Atomic64 or Atomic32).
+// It also serves to document the AtomicWord interface.
+// ------------------------------------------------------------------------
+
+namespace base {
+namespace subtle {
+
+// Atomically execute:
+//      result = *ptr;
+//      if (*ptr == old_value)
+//        *ptr = new_value;
+//      return result;
+//
+// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value".
+// Always return the old value of "*ptr"
+//
+// This routine implies no memory barriers.
+inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr,
+                                           AtomicWord old_value,
+                                           AtomicWord new_value) {
+  return NoBarrier_CompareAndSwap(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
+      old_value, new_value);
+}
+
+// Atomically store new_value into *ptr, returning the previous value held in
+// *ptr.  This routine implies no memory barriers.
+inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr,
+                                           AtomicWord new_value) {
+  return NoBarrier_AtomicExchange(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
+}
+
+inline AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr,
+                                         AtomicWord new_value) {
+  return Acquire_AtomicExchange(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
+}
+
+inline AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr,
+                                         AtomicWord new_value) {
+  return Release_AtomicExchange(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
+}
+
+inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
+                                         AtomicWord old_value,
+                                         AtomicWord new_value) {
+  return base::subtle::Acquire_CompareAndSwap(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
+      old_value, new_value);
+}
+
+inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr,
+                                         AtomicWord old_value,
+                                         AtomicWord new_value) {
+  return base::subtle::Release_CompareAndSwap(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
+      old_value, new_value);
+}
+
+inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) {
+  NoBarrier_Store(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
+}
+
+inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) {
+  return base::subtle::Acquire_Store(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
+}
+
+inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
+  return base::subtle::Release_Store(
+      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
+}
+
+inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) {
+  return NoBarrier_Load(
+      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
+}
+
+inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) {
+  return base::subtle::Acquire_Load(
+      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
+}
+
+inline AtomicWord Release_Load(volatile const AtomicWord* ptr) {
+  return base::subtle::Release_Load(
+      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
+}
+
+}  // namespace base::subtle
+}  // namespace base
+#endif  // AtomicWordCastType
+
+// ------------------------------------------------------------------------
+// Commented out type definitions and method declarations for documentation
+// of the interface provided by this module.
+// ------------------------------------------------------------------------
+
+#if 0
+
+// Signed 32-bit type that supports the atomic ops below, as well as atomic
+// loads and stores.  Instances must be naturally aligned.  This type differs
+// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits.
+typedef int32_t Atomic32;
+
+// Corresponding operations on Atomic32
+namespace base {
+namespace subtle {
+
+// Signed 64-bit type that supports the atomic ops below, as well as atomic
+// loads and stores.  Instances must be naturally aligned.  This type differs
+// from AtomicWord in 32-bit binaries where AtomicWord is 32-bits.
+typedef int64_t Atomic64;
+
+Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                  Atomic32 old_value,
+                                  Atomic32 new_value);
+Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
+Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
+Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
+Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                Atomic32 old_value,
+                                Atomic32 new_value);
+Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                Atomic32 old_value,
+                                Atomic32 new_value);
+void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value);
+void Acquire_Store(volatile Atomic32* ptr, Atomic32 value);
+void Release_Store(volatile Atomic32* ptr, Atomic32 value);
+Atomic32 NoBarrier_Load(volatile const Atomic32* ptr);
+Atomic32 Acquire_Load(volatile const Atomic32* ptr);
+Atomic32 Release_Load(volatile const Atomic32* ptr);
+
+// Corresponding operations on Atomic64
+Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                  Atomic64 old_value,
+                                  Atomic64 new_value);
+Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
+Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
+Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
+
+Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                Atomic64 old_value,
+                                Atomic64 new_value);
+Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                Atomic64 old_value,
+                                Atomic64 new_value);
+void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value);
+void Acquire_Store(volatile Atomic64* ptr, Atomic64 value);
+void Release_Store(volatile Atomic64* ptr, Atomic64 value);
+Atomic64 NoBarrier_Load(volatile const Atomic64* ptr);
+Atomic64 Acquire_Load(volatile const Atomic64* ptr);
+Atomic64 Release_Load(volatile const Atomic64* ptr);
+}  // namespace base::subtle
+}  // namespace base
+
+void MemoryBarrier();
+
+#endif  // 0
+
+
+// ------------------------------------------------------------------------
+// The following are to be deprecated when all uses have been changed to
+// use the base::subtle namespace.
+// ------------------------------------------------------------------------
+
+#ifdef AtomicWordCastType
+// AtomicWord versions to be deprecated
+inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
+                                         AtomicWord old_value,
+                                         AtomicWord new_value) {
+  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr,
+                                         AtomicWord old_value,
+                                         AtomicWord new_value) {
+  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
+}
+
+inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) {
+  return base::subtle::Acquire_Store(ptr, value);
+}
+
+inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
+  return base::subtle::Release_Store(ptr, value);
+}
+
+inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) {
+  return base::subtle::Acquire_Load(ptr);
+}
+
+inline AtomicWord Release_Load(volatile const AtomicWord* ptr) {
+  return base::subtle::Release_Load(ptr);
+}
+#endif  // AtomicWordCastType
+
+// 32-bit Acquire/Release operations to be deprecated.
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
+}
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
+}
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  base::subtle::Acquire_Store(ptr, value);
+}
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+  return base::subtle::Release_Store(ptr, value);
+}
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+  return base::subtle::Acquire_Load(ptr);
+}
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+  return base::subtle::Release_Load(ptr);
+}
+
+#ifdef BASE_HAS_ATOMIC64
+
+// 64-bit Acquire/Release operations to be deprecated.
+
+inline base::subtle::Atomic64 Acquire_CompareAndSwap(
+    volatile base::subtle::Atomic64* ptr,
+    base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) {
+  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
+}
+inline base::subtle::Atomic64 Release_CompareAndSwap(
+    volatile base::subtle::Atomic64* ptr,
+    base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) {
+  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
+}
+inline void Acquire_Store(
+    volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) {
+  base::subtle::Acquire_Store(ptr, value);
+}
+inline void Release_Store(
+    volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) {
+  return base::subtle::Release_Store(ptr, value);
+}
+inline base::subtle::Atomic64 Acquire_Load(
+    volatile const base::subtle::Atomic64* ptr) {
+  return base::subtle::Acquire_Load(ptr);
+}
+inline base::subtle::Atomic64 Release_Load(
+    volatile const base::subtle::Atomic64* ptr) {
+  return base::subtle::Release_Load(ptr);
+}
+
+#endif  // BASE_HAS_ATOMIC64
+
+#endif  // THREAD_ATOMICOPS_H_

diff --git a/src/base/basictypes.h b/src/base/basictypes.h
new file mode 100644
index 0000000..4779611
--- /dev/null
+++ b/src/base/basictypes.h

@@ -0,0 +1,384 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef _BASICTYPES_H_
+#define _BASICTYPES_H_
+
+#include <config.h>
+#include <string.h>       // for memcpy()
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>     // gets us PRId64, etc
+#endif
+
+// To use this in an autoconf setting, make sure you run the following
+// autoconf macros:
+//    AC_HEADER_STDC              /* for stdint_h and inttypes_h */
+//    AC_CHECK_TYPES([__int64])   /* defined in some windows platforms */
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>           // uint16_t might be here; PRId64 too.
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>             // to get uint16_t (ISO naming madness)
+#endif
+#include <sys/types.h>          // our last best hope for uint16_t
+
+// Standard typedefs
+// All Google code is compiled with -funsigned-char to make "char"
+// unsigned.  Google code therefore doesn't need a "uchar" type.
+// TODO(csilvers): how do we make sure unsigned-char works on non-gcc systems?
+typedef signed char         schar;
+typedef int8_t              int8;
+typedef int16_t             int16;
+typedef int32_t             int32;
+typedef int64_t             int64;
+
+// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
+// places.  Use the signed types unless your variable represents a bit
+// pattern (eg a hash value) or you really need the extra bit.  Do NOT
+// use 'unsigned' to express "this value should always be positive";
+// use assertions for this.
+
+typedef uint8_t            uint8;
+typedef uint16_t           uint16;
+typedef uint32_t           uint32;
+typedef uint64_t           uint64;
+
+const uint16 kuint16max = (   (uint16) 0xFFFF);
+const uint32 kuint32max = (   (uint32) 0xFFFFFFFF);
+const uint64 kuint64max = ( (((uint64) kuint32max) << 32) | kuint32max );
+
+const  int8  kint8max   = (   (  int8) 0x7F);
+const  int16 kint16max  = (   ( int16) 0x7FFF);
+const  int32 kint32max  = (   ( int32) 0x7FFFFFFF);
+const  int64 kint64max =  ( ((( int64) kint32max) << 32) | kuint32max );
+
+const  int8  kint8min   = (   (  int8) 0x80);
+const  int16 kint16min  = (   ( int16) 0x8000);
+const  int32 kint32min  = (   ( int32) 0x80000000);
+const  int64 kint64min =  ( ((( int64) kint32min) << 32) | 0 );
+
+// Define the "portable" printf and scanf macros, if they're not
+// already there (via the inttypes.h we #included above, hopefully).
+// Mostly it's old systems that don't support inttypes.h, so we assume
+// they're 32 bit.
+#ifndef PRIx64
+#define PRIx64 "llx"
+#endif
+#ifndef SCNx64
+#define SCNx64 "llx"
+#endif
+#ifndef PRId64
+#define PRId64 "lld"
+#endif
+#ifndef SCNd64
+#define SCNd64 "lld"
+#endif
+#ifndef PRIu64
+#define PRIu64 "llu"
+#endif
+#ifndef PRIxPTR
+#define PRIxPTR "lx"
+#endif
+
+// Also allow for printing of a pthread_t.
+#define GPRIuPTHREAD "lu"
+#define GPRIxPTHREAD "lx"
+#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) || defined(__FreeBSD__)
+#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt)
+#else
+#define PRINTABLE_PTHREAD(pthreadt) pthreadt
+#endif
+
+// A macro to disallow the evil copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define DISALLOW_EVIL_CONSTRUCTORS(TypeName)    \
+  TypeName(const TypeName&);                    \
+  void operator=(const TypeName&)
+
+// An alternate name that leaves out the moral judgment... :-)
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) DISALLOW_EVIL_CONSTRUCTORS(TypeName)
+
+// The COMPILE_ASSERT macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+//   COMPILE_ASSERT(sizeof(num_content_type_names) == sizeof(int),
+//                  content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+//   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+//
+// Implementation details of COMPILE_ASSERT:
+//
+// - COMPILE_ASSERT works by defining an array type that has -1
+//   elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+//     #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+//   does not work, as gcc supports variable-length arrays whose sizes
+//   are determined at run-time (this is gcc's extension and not part
+//   of the C++ standard).  As a result, gcc fails to reject the
+//   following code with the simple definition:
+//
+//     int foo;
+//     COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
+//                               // not a compile-time constant.
+//
+// - By using the type CompileAssert<(bool(expr))>, we ensures that
+//   expr is a compile-time constant.  (Template arguments must be
+//   determined at compile-time.)
+//
+// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
+//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
+//
+//     CompileAssert<bool(expr)>
+//
+//   instead, these compilers will refuse to compile
+//
+//     COMPILE_ASSERT(5 > 0, some_message);
+//
+//   (They seem to think the ">" in "5 > 0" marks the end of the
+//   template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+//     ((expr) ? 1 : -1).
+//
+//   This is to avoid running into a bug in MS VC 7.1, which
+//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+
+template <bool>
+struct CompileAssert {
+};
+
+#ifdef HAVE___ATTRIBUTE__
+# define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+# define ATTRIBUTE_UNUSED
+#endif
+
+#define COMPILE_ASSERT(expr, msg)                               \
+  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED
+
+#define arraysize(a)  (sizeof(a) / sizeof(*(a)))
+
+#define OFFSETOF_MEMBER(strct, field)                                   \
+   (reinterpret_cast<char*>(&reinterpret_cast<strct*>(16)->field) -     \
+    reinterpret_cast<char*>(16))
+
+// bit_cast<Dest,Source> implements the equivalent of
+// "*reinterpret_cast<Dest*>(&source)".
+//
+// The reinterpret_cast method would produce undefined behavior
+// according to ISO C++ specification section 3.10 -15 -.
+// bit_cast<> calls memcpy() which is blessed by the standard,
+// especially by the example in section 3.9.
+//
+// Fortunately memcpy() is very fast.  In optimized mode, with a
+// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
+// code with the minimal amount of data movement.  On a 32-bit system,
+// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
+// compiles to two loads and two stores.
+
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+  COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes);
+  Dest dest;
+  memcpy(&dest, &source, sizeof(dest));
+  return dest;
+}
+
+#ifdef HAVE___ATTRIBUTE__
+# define ATTRIBUTE_WEAK      __attribute__((weak))
+# define ATTRIBUTE_NOINLINE  __attribute__((noinline))
+#else
+# define ATTRIBUTE_WEAK
+# define ATTRIBUTE_NOINLINE
+#endif
+
+#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__)
+# define ATTRIBUTE_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
+#else
+# define ATTRIBUTE_VISIBILITY_HIDDEN
+#endif
+
+// Section attributes are supported for both ELF and Mach-O, but in
+// very different ways.  Here's the API we provide:
+// 1) ATTRIBUTE_SECTION: put this with the declaration of all functions
+//    you want to be in the same linker section
+// 2) DEFINE_ATTRIBUTE_SECTION_VARS: must be called once per unique
+//    name.  You want to make sure this is executed before any
+//    DECLARE_ATTRIBUTE_SECTION_VARS; the easiest way is to put them
+//    in the same .cc file.  Put this call at the global level.
+// 3) INIT_ATTRIBUTE_SECTION_VARS: you can scatter calls to this in
+//    multiple places to help ensure execution before any
+//    DECLARE_ATTRIBUTE_SECTION_VARS.  You must have at least one
+//    DEFINE, but you can have many INITs.  Put each in its own scope.
+// 4) DECLARE_ATTRIBUTE_SECTION_VARS: must be called before using
+//    ATTRIBUTE_SECTION_START or ATTRIBUTE_SECTION_STOP on a name.
+//    Put this call at the global level.
+// 5) ATTRIBUTE_SECTION_START/ATTRIBUTE_SECTION_STOP: call this to say
+//    where in memory a given section is.  All functions declared with
+//    ATTRIBUTE_SECTION are guaranteed to be between START and STOP.
+
+#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__)
+# define ATTRIBUTE_SECTION(name) __attribute__ ((section (#name)))
+
+  // Weak section declaration to be used as a global declaration
+  // for ATTRIBUTE_SECTION_START|STOP(name) to compile and link
+  // even without functions with ATTRIBUTE_SECTION(name).
+# define DECLARE_ATTRIBUTE_SECTION_VARS(name) \
+    extern char __start_##name[] ATTRIBUTE_WEAK; \
+    extern char __stop_##name[] ATTRIBUTE_WEAK
+# define INIT_ATTRIBUTE_SECTION_VARS(name)     // no-op for ELF
+# define DEFINE_ATTRIBUTE_SECTION_VARS(name)   // no-op for ELF
+
+  // Return void* pointers to start/end of a section of code with functions
+  // having ATTRIBUTE_SECTION(name), or 0 if no such function exists.
+  // One must DECLARE_ATTRIBUTE_SECTION(name) for this to compile and link.
+# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name))
+# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name))
+# define HAVE_ATTRIBUTE_SECTION_START 1
+
+#elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__)
+# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name)))
+
+#include <mach-o/getsect.h>
+#include <mach-o/dyld.h>
+class AssignAttributeStartEnd {
+ public:
+  AssignAttributeStartEnd(const char* name, char** pstart, char** pend) {
+    // Find out what dynamic library name is defined in
+    if (_dyld_present()) {
+      for (int i = _dyld_image_count() - 1; i >= 0; --i) {
+        const mach_header* hdr = _dyld_get_image_header(i);
+#ifdef MH_MAGIC_64
+        if (hdr->magic == MH_MAGIC_64) {
+          uint64_t len;
+          *pstart = getsectdatafromheader_64((mach_header_64*)hdr,
+                                             "__TEXT", name, &len);
+          if (*pstart) {   // NULL if not defined in this dynamic library
+            *pstart += _dyld_get_image_vmaddr_slide(i);   // correct for reloc
+            *pend = *pstart + len;
+            return;
+          }
+        }
+#endif
+        if (hdr->magic == MH_MAGIC) {
+          uint32_t len;
+          *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len);
+          if (*pstart) {   // NULL if not defined in this dynamic library
+            *pstart += _dyld_get_image_vmaddr_slide(i);   // correct for reloc
+            *pend = *pstart + len;
+            return;
+          }
+        }
+      }
+    }
+    // If we get here, not defined in a dll at all.  See if defined statically.
+    unsigned long len;    // don't ask me why this type isn't uint32_t too...
+    *pstart = getsectdata("__TEXT", name, &len);
+    *pend = *pstart + len;
+  }
+};
+
+#define DECLARE_ATTRIBUTE_SECTION_VARS(name)    \
+  extern char* __start_##name;                  \
+  extern char* __stop_##name
+
+#define INIT_ATTRIBUTE_SECTION_VARS(name)               \
+  DECLARE_ATTRIBUTE_SECTION_VARS(name);                 \
+  static const AssignAttributeStartEnd __assign_##name( \
+    #name, &__start_##name, &__stop_##name)
+
+#define DEFINE_ATTRIBUTE_SECTION_VARS(name)     \
+  char* __start_##name, *__stop_##name;         \
+  INIT_ATTRIBUTE_SECTION_VARS(name)
+
+# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name))
+# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name))
+# define HAVE_ATTRIBUTE_SECTION_START 1
+
+#else  // not HAVE___ATTRIBUTE__ && __ELF__, nor HAVE___ATTRIBUTE__ && __MACH__
+# define ATTRIBUTE_SECTION(name)
+# define DECLARE_ATTRIBUTE_SECTION_VARS(name)
+# define INIT_ATTRIBUTE_SECTION_VARS(name)
+# define DEFINE_ATTRIBUTE_SECTION_VARS(name)
+# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(0))
+# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(0))
+
+#endif  // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
+
+#if defined(HAVE___ATTRIBUTE__)
+# if (defined(__i386__) || defined(__x86_64__))
+#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
+# elif (defined(__PPC__) || defined(__PPC64__))
+#   define CACHELINE_ALIGNED __attribute__((aligned(16)))
+# elif (defined(__arm__))
+#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
+    // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
+# elif (defined(__mips__))
+#   define CACHELINE_ALIGNED __attribute__((aligned(128)))
+# elif (defined(__aarch64__))
+#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
+    // implementation specific, Cortex-A53 and 57 should have 64 bytes
+# else
+#   error Could not determine cache line length - unknown architecture
+# endif
+#else
+# define CACHELINE_ALIGNED
+#endif  // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
+
+
+// The following enum should be used only as a constructor argument to indicate
+// that the variable has static storage class, and that the constructor should
+// do nothing to its state.  It indicates to the reader that it is legal to
+// declare a static nistance of the class, provided the constructor is given
+// the base::LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
+// static variable that has a constructor or a destructor because invocation
+// order is undefined.  However, IF the type can be initialized by filling with
+// zeroes (which the loader does for static variables), AND the destructor also
+// does nothing to the storage, then a constructor declared as
+//       explicit MyClass(base::LinkerInitialized x) {}
+// and invoked as
+//       static MyClass my_variable_name(base::LINKER_INITIALIZED);
+namespace base {
+enum LinkerInitialized { LINKER_INITIALIZED };
+}
+
+#endif  // _BASICTYPES_H_

diff --git a/src/base/commandlineflags.h b/src/base/commandlineflags.h
new file mode 100644
index 0000000..f54776a
--- /dev/null
+++ b/src/base/commandlineflags.h

@@ -0,0 +1,166 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// This file is a compatibility layer that defines Google's version of
+// command line flags that are used for configuration.
+//
+// We put flags into their own namespace.  It is purposefully
+// named in an opaque way that people should have trouble typing
+// directly.  The idea is that DEFINE puts the flag in the weird
+// namespace, and DECLARE imports the flag from there into the
+// current namespace.  The net result is to force people to use
+// DECLARE to get access to a flag, rather than saying
+//   extern bool FLAGS_logtostderr;
+// or some such instead.  We want this so we can put extra
+// functionality (like sanity-checking) in DECLARE if we want,
+// and make sure it is picked up everywhere.
+//
+// We also put the type of the variable in the namespace, so that
+// people can't DECLARE_int32 something that they DEFINE_bool'd
+// elsewhere.
+#ifndef BASE_COMMANDLINEFLAGS_H_
+#define BASE_COMMANDLINEFLAGS_H_
+
+#include <config.h>
+#include <string>
+#include <string.h>               // for memchr
+#include <stdlib.h>               // for getenv
+#include "base/basictypes.h"
+
+#define DECLARE_VARIABLE(type, name)                                          \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
+  extern PERFTOOLS_DLL_DECL type FLAGS_##name;                                \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
+
+#define DEFINE_VARIABLE(type, name, value, meaning) \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
+  PERFTOOLS_DLL_DECL type FLAGS_##name(value);                                \
+  char FLAGS_no##name;                                                        \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
+
+// bool specialization
+#define DECLARE_bool(name) \
+  DECLARE_VARIABLE(bool, name)
+#define DEFINE_bool(name, value, meaning) \
+  DEFINE_VARIABLE(bool, name, value, meaning)
+
+// int32 specialization
+#define DECLARE_int32(name) \
+  DECLARE_VARIABLE(int32, name)
+#define DEFINE_int32(name, value, meaning) \
+  DEFINE_VARIABLE(int32, name, value, meaning)
+
+// int64 specialization
+#define DECLARE_int64(name) \
+  DECLARE_VARIABLE(int64, name)
+#define DEFINE_int64(name, value, meaning) \
+  DEFINE_VARIABLE(int64, name, value, meaning)
+
+#define DECLARE_uint64(name) \
+  DECLARE_VARIABLE(uint64, name)
+#define DEFINE_uint64(name, value, meaning) \
+  DEFINE_VARIABLE(uint64, name, value, meaning)
+
+// double specialization
+#define DECLARE_double(name) \
+  DECLARE_VARIABLE(double, name)
+#define DEFINE_double(name, value, meaning) \
+  DEFINE_VARIABLE(double, name, value, meaning)
+
+// Special case for string, because we have to specify the namespace
+// std::string, which doesn't play nicely with our FLAG__namespace hackery.
+#define DECLARE_string(name)                                          \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
+  extern std::string FLAGS_##name;                                                   \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
+#define DEFINE_string(name, value, meaning) \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
+  std::string FLAGS_##name(value);                                                   \
+  char FLAGS_no##name;                                                        \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
+
+// implemented in sysinfo.cc
+namespace tcmalloc {
+  namespace commandlineflags {
+
+    inline bool StringToBool(const char *value, bool def) {
+      if (!value) {
+        return def;
+      }
+      return memchr("tTyY1\0", value[0], 6) != NULL;
+    }
+
+    inline int StringToInt(const char *value, int def) {
+      if (!value) {
+        return def;
+      }
+      return strtol(value, NULL, 10);
+    }
+
+    inline long long StringToLongLong(const char *value, long long def) {
+      if (!value) {
+        return def;
+      }
+      return strtoll(value, NULL, 10);
+    }
+
+    inline double StringToDouble(const char *value, double def) {
+      if (!value) {
+        return def;
+      }
+      return strtod(value, NULL);
+    }
+  }
+}
+
+// These macros (could be functions, but I don't want to bother with a .cc
+// file), make it easier to initialize flags from the environment.
+
+#define EnvToString(envname, dflt)   \
+  (!getenv(envname) ? (dflt) : getenv(envname))
+
+#define EnvToBool(envname, dflt)   \
+  tcmalloc::commandlineflags::StringToBool(getenv(envname), dflt)
+
+#define EnvToInt(envname, dflt)  \
+  tcmalloc::commandlineflags::StringToInt(getenv(envname), dflt)
+
+#define EnvToInt64(envname, dflt)  \
+  tcmalloc::commandlineflags::StringToLongLong(getenv(envname), dflt)
+
+#define EnvToDouble(envname, dflt)  \
+  tcmalloc::commandlineflags::StringToDouble(getenv(envname), dflt)
+
+#endif  // BASE_COMMANDLINEFLAGS_H_

diff --git a/src/base/cycleclock.h b/src/base/cycleclock.h
new file mode 100644
index 0000000..dc2d569
--- /dev/null
+++ b/src/base/cycleclock.h

@@ -0,0 +1,173 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2004, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ----------------------------------------------------------------------
+// CycleClock
+//    A CycleClock tells you the current time in Cycles.  The "time"
+//    is actually time since power-on.  This is like time() but doesn't
+//    involve a system call and is much more precise.
+//
+// NOTE: Not all cpu/platform/kernel combinations guarantee that this
+// clock increments at a constant rate or is synchronized across all logical
+// cpus in a system.
+//
+// Also, in some out of order CPU implementations, the CycleClock is not 
+// serializing. So if you're trying to count at cycles granularity, your
+// data might be inaccurate due to out of order instruction execution.
+// ----------------------------------------------------------------------
+
+#ifndef GOOGLE_BASE_CYCLECLOCK_H_
+#define GOOGLE_BASE_CYCLECLOCK_H_
+
+#include "base/basictypes.h"   // make sure we get the def for int64
+#include "base/arm_instruction_set_select.h"
+// base/sysinfo.h is really big and we don't want to include it unless
+// it is necessary.
+#if defined(__arm__) || defined(__mips__) || defined(__aarch64__)
+# include "base/sysinfo.h"
+#endif
+#if defined(__MACH__) && defined(__APPLE__)
+# include <mach/mach_time.h>
+#endif
+// For MSVC, we want to use '_asm rdtsc' when possible (since it works
+// with even ancient MSVC compilers), and when not possible the
+// __rdtsc intrinsic, declared in <intrin.h>.  Unfortunately, in some
+// environments, <windows.h> and <intrin.h> have conflicting
+// declarations of some other intrinsics, breaking compilation.
+// Therefore, we simply declare __rdtsc ourselves. See also
+// http://connect.microsoft.com/VisualStudio/feedback/details/262047
+#if defined(_MSC_VER) && !defined(_M_IX86)
+extern "C" uint64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+#endif
+#if defined(ARMV3) || defined(__mips__) || defined(__aarch64__)
+#include <sys/time.h>
+#endif
+
+// NOTE: only i386 and x86_64 have been well tested.
+// PPC, sparc, alpha, and ia64 are based on
+//    http://peter.kuscsik.com/wordpress/?p=14
+// with modifications by m3b.  See also
+//    https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
+struct CycleClock {
+  // This should return the number of cycles since power-on.  Thread-safe.
+  static inline int64 Now() {
+#if defined(__MACH__) && defined(__APPLE__)
+    // this goes at the top because we need ALL Macs, regardless of
+    // architecture, to return the number of "mach time units" that
+    // have passed since startup.  See sysinfo.cc where
+    // InitializeSystemInfo() sets the supposed cpu clock frequency of
+    // macs to the number of mach time units per second, not actual
+    // CPU clock frequency (which can change in the face of CPU
+    // frequency scaling).  Also note that when the Mac sleeps, this
+    // counter pauses; it does not continue counting, nor does it
+    // reset to zero.
+    return mach_absolute_time();
+#elif defined(__i386__)
+    int64 ret;
+    __asm__ volatile ("rdtsc" : "=A" (ret) );
+    return ret;
+#elif defined(__x86_64__) || defined(__amd64__)
+    uint64 low, high;
+    __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
+    return (high << 32) | low;
+#elif defined(__powerpc64__) || defined(__ppc64__)
+    uint64 tb;
+    __asm__ volatile (\
+      "mfspr %0, 268"
+      : "=r" (tb));
+    return tb;
+#elif defined(__powerpc__) || defined(__ppc__)
+    // This returns a time-base, which is not always precisely a cycle-count.
+    uint32 tbu, tbl, tmp;
+    __asm__ volatile (\
+      "0:\n"
+      "mftbu %0\n"
+      "mftbl %1\n"
+      "mftbu %2\n"
+      "cmpw %0, %2\n"
+      "bne- 0b"
+      : "=r" (tbu), "=r" (tbl), "=r" (tmp));
+    return (((uint64) tbu << 32) | tbl);
+#elif defined(__sparc__)
+    int64 tick;
+    asm(".byte 0x83, 0x41, 0x00, 0x00");
+    asm("mov   %%g1, %0" : "=r" (tick));
+    return tick;
+#elif defined(__ia64__)
+    int64 itc;
+    asm("mov %0 = ar.itc" : "=r" (itc));
+    return itc;
+#elif defined(_MSC_VER) && defined(_M_IX86)
+    // Older MSVC compilers (like 7.x) don't seem to support the
+    // __rdtsc intrinsic properly, so I prefer to use _asm instead
+    // when I know it will work.  Otherwise, I'll use __rdtsc and hope
+    // the code is being compiled with a non-ancient compiler.
+    _asm rdtsc
+#elif defined(_MSC_VER)
+    return __rdtsc();
+#elif defined(ARMV3) || defined(__aarch64__)
+#if defined(ARMV7)  // V7 is the earliest arch that has a standard cyclecount
+    uint32 pmccntr;
+    uint32 pmuseren;
+    uint32 pmcntenset;
+    // Read the user mode perf monitor counter access permissions.
+    asm volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
+    if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
+      asm volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
+      if (pmcntenset & 0x80000000ul) {  // Is it counting?
+        asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
+        // The counter is set up to count every 64th cycle
+        return static_cast<int64>(pmccntr) * 64;  // Should optimize to << 6
+      }
+    }
+#endif
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return static_cast<int64>((tv.tv_sec + tv.tv_usec * 0.000001)
+                              * CyclesPerSecond());
+#elif defined(__mips__)
+    // mips apparently only allows rdtsc for superusers, so we fall
+    // back to gettimeofday.  It's possible clock_gettime would be better.
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return static_cast<int64>((tv.tv_sec + tv.tv_usec * 0.000001)
+                              * CyclesPerSecond());
+#else
+// The soft failover to a generic implementation is automatic only for ARM.
+// For other platforms the developer is expected to make an attempt to create
+// a fast implementation and use generic version if nothing better is available.
+#error You need to define CycleTimer for your O/S and CPU
+#endif
+  }
+};
+
+
+#endif  // GOOGLE_BASE_CYCLECLOCK_H_

diff --git a/src/base/dynamic_annotations.c b/src/base/dynamic_annotations.c
new file mode 100644
index 0000000..87bd2ec
--- /dev/null
+++ b/src/base/dynamic_annotations.c

@@ -0,0 +1,179 @@
+/* Copyright (c) 2008-2009, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Kostya Serebryany
+ */
+
+#ifdef __cplusplus
+# error "This file should be built as pure C to avoid name mangling"
+#endif
+
+#include "config.h"
+#include <stdlib.h>
+#include <string.h>
+
+#include "base/dynamic_annotations.h"
+#include "getenv_safe.h" // for TCMallocGetenvSafe
+
+#ifdef __GNUC__
+/* valgrind.h uses gcc extensions so it won't build with other compilers */
+# ifdef HAVE_VALGRIND_H    /* prefer the user's copy if they have it */
+#  include <valgrind.h>
+# else                     /* otherwise just use the copy that we have */
+#  include "third_party/valgrind.h"
+# endif
+#endif
+
+/* Compiler-based ThreadSanitizer defines
+   DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL = 1
+   and provides its own definitions of the functions. */
+
+#ifndef DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL
+# define DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL 0
+#endif
+
+/* Each function is empty and called (via a macro) only in debug mode.
+   The arguments are captured by dynamic tools at runtime. */
+
+#if DYNAMIC_ANNOTATIONS_ENABLED == 1 \
+    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
+
+void AnnotateRWLockCreate(const char *file, int line,
+                          const volatile void *lock){}
+void AnnotateRWLockDestroy(const char *file, int line,
+                           const volatile void *lock){}
+void AnnotateRWLockAcquired(const char *file, int line,
+                            const volatile void *lock, long is_w){}
+void AnnotateRWLockReleased(const char *file, int line,
+                            const volatile void *lock, long is_w){}
+void AnnotateBarrierInit(const char *file, int line,
+                         const volatile void *barrier, long count,
+                         long reinitialization_allowed) {}
+void AnnotateBarrierWaitBefore(const char *file, int line,
+                               const volatile void *barrier) {}
+void AnnotateBarrierWaitAfter(const char *file, int line,
+                              const volatile void *barrier) {}
+void AnnotateBarrierDestroy(const char *file, int line,
+                            const volatile void *barrier) {}
+
+void AnnotateCondVarWait(const char *file, int line,
+                         const volatile void *cv,
+                         const volatile void *lock){}
+void AnnotateCondVarSignal(const char *file, int line,
+                           const volatile void *cv){}
+void AnnotateCondVarSignalAll(const char *file, int line,
+                              const volatile void *cv){}
+void AnnotatePublishMemoryRange(const char *file, int line,
+                                const volatile void *address,
+                                long size){}
+void AnnotateUnpublishMemoryRange(const char *file, int line,
+                                  const volatile void *address,
+                                  long size){}
+void AnnotatePCQCreate(const char *file, int line,
+                       const volatile void *pcq){}
+void AnnotatePCQDestroy(const char *file, int line,
+                        const volatile void *pcq){}
+void AnnotatePCQPut(const char *file, int line,
+                    const volatile void *pcq){}
+void AnnotatePCQGet(const char *file, int line,
+                    const volatile void *pcq){}
+void AnnotateNewMemory(const char *file, int line,
+                       const volatile void *mem,
+                       long size){}
+void AnnotateExpectRace(const char *file, int line,
+                        const volatile void *mem,
+                        const char *description){}
+void AnnotateBenignRace(const char *file, int line,
+                        const volatile void *mem,
+                        const char *description){}
+void AnnotateBenignRaceSized(const char *file, int line,
+                             const volatile void *mem,
+                             long size,
+                             const char *description) {}
+void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
+                                  const volatile void *mu){}
+void AnnotateTraceMemory(const char *file, int line,
+                         const volatile void *arg){}
+void AnnotateThreadName(const char *file, int line,
+                        const char *name){}
+void AnnotateIgnoreReadsBegin(const char *file, int line){}
+void AnnotateIgnoreReadsEnd(const char *file, int line){}
+void AnnotateIgnoreWritesBegin(const char *file, int line){}
+void AnnotateIgnoreWritesEnd(const char *file, int line){}
+void AnnotateEnableRaceDetection(const char *file, int line, int enable){}
+void AnnotateNoOp(const char *file, int line,
+                  const volatile void *arg){}
+void AnnotateFlushState(const char *file, int line){}
+
+#endif  /* DYNAMIC_ANNOTATIONS_ENABLED == 1
+    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
+
+#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
+
+static int GetRunningOnValgrind(void) {
+#ifdef RUNNING_ON_VALGRIND
+  if (RUNNING_ON_VALGRIND) return 1;
+#endif
+  const char *running_on_valgrind_str = TCMallocGetenvSafe("RUNNING_ON_VALGRIND");
+  if (running_on_valgrind_str) {
+    return strcmp(running_on_valgrind_str, "0") != 0;
+  }
+  return 0;
+}
+
+/* See the comments in dynamic_annotations.h */
+int RunningOnValgrind(void) {
+  static volatile int running_on_valgrind = -1;
+  int local_running_on_valgrind = running_on_valgrind;
+  /* C doesn't have thread-safe initialization of statics, and we
+     don't want to depend on pthread_once here, so hack it. */
+  ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack");
+  if (local_running_on_valgrind == -1)
+    running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
+  return local_running_on_valgrind;
+}
+
+#endif  /* DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
+
+/* See the comments in dynamic_annotations.h */
+double ValgrindSlowdown(void) {
+  /* Same initialization hack as in RunningOnValgrind(). */
+  static volatile double slowdown = 0.0;
+  double local_slowdown = slowdown;
+  ANNOTATE_BENIGN_RACE(&slowdown, "safe hack");
+  if (RunningOnValgrind() == 0) {
+    return 1.0;
+  }
+  if (local_slowdown == 0.0) {
+    char *env = getenv("VALGRIND_SLOWDOWN");
+    slowdown = local_slowdown = env ? atof(env) : 50.0;
+  }
+  return local_slowdown;
+}

diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h
new file mode 100644
index 0000000..4669315
--- /dev/null
+++ b/src/base/dynamic_annotations.h

@@ -0,0 +1,627 @@
+/* Copyright (c) 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Kostya Serebryany
+ */
+
+/* This file defines dynamic annotations for use with dynamic analysis
+   tool such as valgrind, PIN, etc.
+
+   Dynamic annotation is a source code annotation that affects
+   the generated code (that is, the annotation is not a comment).
+   Each such annotation is attached to a particular
+   instruction and/or to a particular object (address) in the program.
+
+   The annotations that should be used by users are macros in all upper-case
+   (e.g., ANNOTATE_NEW_MEMORY).
+
+   Actual implementation of these macros may differ depending on the
+   dynamic analysis tool being used.
+
+   See http://code.google.com/p/data-race-test/  for more information.
+
+   This file supports the following dynamic analysis tools:
+   - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero).
+      Macros are defined empty.
+   - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1).
+      Macros are defined as calls to non-inlinable empty functions
+      that are intercepted by Valgrind. */
+
+#ifndef BASE_DYNAMIC_ANNOTATIONS_H_
+#define BASE_DYNAMIC_ANNOTATIONS_H_
+
+#ifndef DYNAMIC_ANNOTATIONS_ENABLED
+# define DYNAMIC_ANNOTATIONS_ENABLED 0
+#endif
+
+#if DYNAMIC_ANNOTATIONS_ENABLED != 0
+
+  /* -------------------------------------------------------------
+     Annotations useful when implementing condition variables such as CondVar,
+     using conditional critical sections (Await/LockWhen) and when constructing
+     user-defined synchronization mechanisms.
+
+     The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
+     be used to define happens-before arcs in user-defined synchronization
+     mechanisms:  the race detector will infer an arc from the former to the
+     latter when they share the same argument pointer.
+
+     Example 1 (reference counting):
+
+     void Unref() {
+       ANNOTATE_HAPPENS_BEFORE(&refcount_);
+       if (AtomicDecrementByOne(&refcount_) == 0) {
+         ANNOTATE_HAPPENS_AFTER(&refcount_);
+         delete this;
+       }
+     }
+
+     Example 2 (message queue):
+
+     void MyQueue::Put(Type *e) {
+       MutexLock lock(&mu_);
+       ANNOTATE_HAPPENS_BEFORE(e);
+       PutElementIntoMyQueue(e);
+     }
+
+     Type *MyQueue::Get() {
+       MutexLock lock(&mu_);
+       Type *e = GetElementFromMyQueue();
+       ANNOTATE_HAPPENS_AFTER(e);
+       return e;
+     }
+
+     Note: when possible, please use the existing reference counting and message
+     queue implementations instead of inventing new ones. */
+
+  /* Report that wait on the condition variable at address "cv" has succeeded
+     and the lock at address "lock" is held. */
+  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \
+    AnnotateCondVarWait(__FILE__, __LINE__, cv, lock)
+
+  /* Report that wait on the condition variable at "cv" has succeeded.  Variant
+     w/o lock. */
+  #define ANNOTATE_CONDVAR_WAIT(cv) \
+    AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL)
+
+  /* Report that we are about to signal on the condition variable at address
+     "cv". */
+  #define ANNOTATE_CONDVAR_SIGNAL(cv) \
+    AnnotateCondVarSignal(__FILE__, __LINE__, cv)
+
+  /* Report that we are about to signal_all on the condition variable at "cv". */
+  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \
+    AnnotateCondVarSignalAll(__FILE__, __LINE__, cv)
+
+  /* Annotations for user-defined synchronization mechanisms. */
+  #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj)
+  #define ANNOTATE_HAPPENS_AFTER(obj)  ANNOTATE_CONDVAR_WAIT(obj)
+
+  /* Report that the bytes in the range [pointer, pointer+size) are about
+     to be published safely. The race checker will create a happens-before
+     arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to
+     subsequent accesses to this memory.
+     Note: this annotation may not work properly if the race detector uses
+     sampling, i.e. does not observe all memory accesses.
+     */
+  #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \
+    AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size)
+
+  /* DEPRECATED. Don't use it. */
+  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \
+    AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size)
+
+  /* DEPRECATED. Don't use it. */
+  #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size)   \
+    do {                                              \
+      ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \
+      ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size);   \
+    } while (0)
+
+  /* Instruct the tool to create a happens-before arc between mu->Unlock() and
+     mu->Lock(). This annotation may slow down the race detector and hide real
+     races. Normally it is used only when it would be difficult to annotate each
+     of the mutex's critical sections individually using the annotations above.
+     This annotation makes sense only for hybrid race detectors. For pure
+     happens-before detectors this is a no-op. For more details see
+     http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */
+  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \
+    AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
+
+  /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */
+  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \
+    AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
+
+  /* -------------------------------------------------------------
+     Annotations useful when defining memory allocators, or when memory that
+     was protected in one way starts to be protected in another. */
+
+  /* Report that a new memory at "address" of size "size" has been allocated.
+     This might be used when the memory has been retrieved from a free list and
+     is about to be reused, or when a the locking discipline for a variable
+     changes. */
+  #define ANNOTATE_NEW_MEMORY(address, size) \
+    AnnotateNewMemory(__FILE__, __LINE__, address, size)
+
+  /* -------------------------------------------------------------
+     Annotations useful when defining FIFO queues that transfer data between
+     threads. */
+
+  /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at
+     address "pcq" has been created.  The ANNOTATE_PCQ_* annotations
+     should be used only for FIFO queues.  For non-FIFO queues use
+     ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */
+  #define ANNOTATE_PCQ_CREATE(pcq) \
+    AnnotatePCQCreate(__FILE__, __LINE__, pcq)
+
+  /* Report that the queue at address "pcq" is about to be destroyed. */
+  #define ANNOTATE_PCQ_DESTROY(pcq) \
+    AnnotatePCQDestroy(__FILE__, __LINE__, pcq)
+
+  /* Report that we are about to put an element into a FIFO queue at address
+     "pcq". */
+  #define ANNOTATE_PCQ_PUT(pcq) \
+    AnnotatePCQPut(__FILE__, __LINE__, pcq)
+
+  /* Report that we've just got an element from a FIFO queue at address "pcq". */
+  #define ANNOTATE_PCQ_GET(pcq) \
+    AnnotatePCQGet(__FILE__, __LINE__, pcq)
+
+  /* -------------------------------------------------------------
+     Annotations that suppress errors.  It is usually better to express the
+     program's synchronization using the other annotations, but these can
+     be used when all else fails. */
+
+  /* Report that we may have a benign race at "pointer", with size
+     "sizeof(*(pointer))". "pointer" must be a non-void* pointer.  Insert at the
+     point where "pointer" has been allocated, preferably close to the point
+     where the race happens.  See also ANNOTATE_BENIGN_RACE_STATIC. */
+  #define ANNOTATE_BENIGN_RACE(pointer, description) \
+    AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \
+                            sizeof(*(pointer)), description)
+
+  /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to
+     the memory range [address, address+size). */
+  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \
+    AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description)
+
+  /* Request the analysis tool to ignore all reads in the current thread
+     until ANNOTATE_IGNORE_READS_END is called.
+     Useful to ignore intentional racey reads, while still checking
+     other reads and all writes.
+     See also ANNOTATE_UNPROTECTED_READ. */
+  #define ANNOTATE_IGNORE_READS_BEGIN() \
+    AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
+
+  /* Stop ignoring reads. */
+  #define ANNOTATE_IGNORE_READS_END() \
+    AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
+
+  /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */
+  #define ANNOTATE_IGNORE_WRITES_BEGIN() \
+    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+
+  /* Stop ignoring writes. */
+  #define ANNOTATE_IGNORE_WRITES_END() \
+    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
+
+  /* Start ignoring all memory accesses (reads and writes). */
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \
+    do {\
+      ANNOTATE_IGNORE_READS_BEGIN();\
+      ANNOTATE_IGNORE_WRITES_BEGIN();\
+    }while(0)\
+
+  /* Stop ignoring all memory accesses. */
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \
+    do {\
+      ANNOTATE_IGNORE_WRITES_END();\
+      ANNOTATE_IGNORE_READS_END();\
+    }while(0)\
+
+  /* Enable (enable!=0) or disable (enable==0) race detection for all threads.
+     This annotation could be useful if you want to skip expensive race analysis
+     during some period of program execution, e.g. during initialization. */
+  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \
+    AnnotateEnableRaceDetection(__FILE__, __LINE__, enable)
+
+  /* -------------------------------------------------------------
+     Annotations useful for debugging. */
+
+  /* Request to trace every access to "address". */
+  #define ANNOTATE_TRACE_MEMORY(address) \
+    AnnotateTraceMemory(__FILE__, __LINE__, address)
+
+  /* Report the current thread name to a race detector. */
+  #define ANNOTATE_THREAD_NAME(name) \
+    AnnotateThreadName(__FILE__, __LINE__, name)
+
+  /* -------------------------------------------------------------
+     Annotations useful when implementing locks.  They are not
+     normally needed by modules that merely use locks.
+     The "lock" argument is a pointer to the lock object. */
+
+  /* Report that a lock has been created at address "lock". */
+  #define ANNOTATE_RWLOCK_CREATE(lock) \
+    AnnotateRWLockCreate(__FILE__, __LINE__, lock)
+
+  /* Report that the lock at address "lock" is about to be destroyed. */
+  #define ANNOTATE_RWLOCK_DESTROY(lock) \
+    AnnotateRWLockDestroy(__FILE__, __LINE__, lock)
+
+  /* Report that the lock at address "lock" has been acquired.
+     is_w=1 for writer lock, is_w=0 for reader lock. */
+  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \
+    AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w)
+
+  /* Report that the lock at address "lock" is about to be released. */
+  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \
+    AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w)
+
+  /* -------------------------------------------------------------
+     Annotations useful when implementing barriers.  They are not
+     normally needed by modules that merely use barriers.
+     The "barrier" argument is a pointer to the barrier object. */
+
+  /* Report that the "barrier" has been initialized with initial "count".
+   If 'reinitialization_allowed' is true, initialization is allowed to happen
+   multiple times w/o calling barrier_destroy() */
+  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \
+    AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \
+                        reinitialization_allowed)
+
+  /* Report that we are about to enter barrier_wait("barrier"). */
+  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \
+    AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier)
+
+  /* Report that we just exited barrier_wait("barrier"). */
+  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \
+    AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier)
+
+  /* Report that the "barrier" has been destroyed. */
+  #define ANNOTATE_BARRIER_DESTROY(barrier) \
+    AnnotateBarrierDestroy(__FILE__, __LINE__, barrier)
+
+  /* -------------------------------------------------------------
+     Annotations useful for testing race detectors. */
+
+  /* Report that we expect a race on the variable at "address".
+     Use only in unit tests for a race detector. */
+  #define ANNOTATE_EXPECT_RACE(address, description) \
+    AnnotateExpectRace(__FILE__, __LINE__, address, description)
+
+  /* A no-op. Insert where you like to test the interceptors. */
+  #define ANNOTATE_NO_OP(arg) \
+    AnnotateNoOp(__FILE__, __LINE__, arg)
+
+  /* Force the race detector to flush its state. The actual effect depends on
+   * the implementation of the detector. */
+  #define ANNOTATE_FLUSH_STATE() \
+    AnnotateFlushState(__FILE__, __LINE__)
+
+
+#else  /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
+
+  #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */
+  #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */
+  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */
+  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */
+  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */
+  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */
+  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */
+  #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */
+  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */
+  #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */
+  #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */
+  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */
+  #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */
+  #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */
+  #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */
+  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size)  /* empty */
+  #define ANNOTATE_SWAP_MEMORY_RANGE(address, size)  /* empty */
+  #define ANNOTATE_PCQ_CREATE(pcq) /* empty */
+  #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */
+  #define ANNOTATE_PCQ_PUT(pcq) /* empty */
+  #define ANNOTATE_PCQ_GET(pcq) /* empty */
+  #define ANNOTATE_NEW_MEMORY(address, size) /* empty */
+  #define ANNOTATE_EXPECT_RACE(address, description) /* empty */
+  #define ANNOTATE_BENIGN_RACE(address, description) /* empty */
+  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */
+  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */
+  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */
+  #define ANNOTATE_TRACE_MEMORY(arg) /* empty */
+  #define ANNOTATE_THREAD_NAME(name) /* empty */
+  #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */
+  #define ANNOTATE_IGNORE_READS_END() /* empty */
+  #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */
+  #define ANNOTATE_IGNORE_WRITES_END() /* empty */
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */
+  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */
+  #define ANNOTATE_NO_OP(arg) /* empty */
+  #define ANNOTATE_FLUSH_STATE() /* empty */
+
+#endif  /* DYNAMIC_ANNOTATIONS_ENABLED */
+
+/* Macro definitions for GCC attributes that allow static thread safety
+   analysis to recognize and use some of the dynamic annotations as
+   escape hatches.
+   TODO(lcwu): remove the check for __SUPPORT_DYN_ANNOTATION__ once the
+   default crosstool/GCC supports these GCC attributes.  */
+
+#define ANNOTALYSIS_STATIC_INLINE
+#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ;
+#define ANNOTALYSIS_IGNORE_READS_BEGIN
+#define ANNOTALYSIS_IGNORE_READS_END
+#define ANNOTALYSIS_IGNORE_WRITES_BEGIN
+#define ANNOTALYSIS_IGNORE_WRITES_END
+#define ANNOTALYSIS_UNPROTECTED_READ
+
+#if defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) && \
+    defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
+
+#if DYNAMIC_ANNOTATIONS_ENABLED == 0
+#define ANNOTALYSIS_ONLY 1
+#undef ANNOTALYSIS_STATIC_INLINE
+#define ANNOTALYSIS_STATIC_INLINE static inline
+#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; }
+#endif
+
+/* Only emit attributes when annotalysis is enabled. */
+#if defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
+#undef  ANNOTALYSIS_IGNORE_READS_BEGIN
+#define ANNOTALYSIS_IGNORE_READS_BEGIN  __attribute__ ((ignore_reads_begin))
+#undef  ANNOTALYSIS_IGNORE_READS_END
+#define ANNOTALYSIS_IGNORE_READS_END    __attribute__ ((ignore_reads_end))
+#undef  ANNOTALYSIS_IGNORE_WRITES_BEGIN
+#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin))
+#undef  ANNOTALYSIS_IGNORE_WRITES_END
+#define ANNOTALYSIS_IGNORE_WRITES_END   __attribute__ ((ignore_writes_end))
+#undef  ANNOTALYSIS_UNPROTECTED_READ
+#define ANNOTALYSIS_UNPROTECTED_READ    __attribute__ ((unprotected_read))
+#endif
+
+#endif // defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__))
+
+/* Use the macros above rather than using these functions directly. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+void AnnotateRWLockCreate(const char *file, int line,
+                          const volatile void *lock);
+void AnnotateRWLockDestroy(const char *file, int line,
+                           const volatile void *lock);
+void AnnotateRWLockAcquired(const char *file, int line,
+                            const volatile void *lock, long is_w);
+void AnnotateRWLockReleased(const char *file, int line,
+                            const volatile void *lock, long is_w);
+void AnnotateBarrierInit(const char *file, int line,
+                         const volatile void *barrier, long count,
+                         long reinitialization_allowed);
+void AnnotateBarrierWaitBefore(const char *file, int line,
+                               const volatile void *barrier);
+void AnnotateBarrierWaitAfter(const char *file, int line,
+                              const volatile void *barrier);
+void AnnotateBarrierDestroy(const char *file, int line,
+                            const volatile void *barrier);
+void AnnotateCondVarWait(const char *file, int line,
+                         const volatile void *cv,
+                         const volatile void *lock);
+void AnnotateCondVarSignal(const char *file, int line,
+                           const volatile void *cv);
+void AnnotateCondVarSignalAll(const char *file, int line,
+                              const volatile void *cv);
+void AnnotatePublishMemoryRange(const char *file, int line,
+                                const volatile void *address,
+                                long size);
+void AnnotateUnpublishMemoryRange(const char *file, int line,
+                                  const volatile void *address,
+                                  long size);
+void AnnotatePCQCreate(const char *file, int line,
+                       const volatile void *pcq);
+void AnnotatePCQDestroy(const char *file, int line,
+                        const volatile void *pcq);
+void AnnotatePCQPut(const char *file, int line,
+                    const volatile void *pcq);
+void AnnotatePCQGet(const char *file, int line,
+                    const volatile void *pcq);
+void AnnotateNewMemory(const char *file, int line,
+                       const volatile void *address,
+                       long size);
+void AnnotateExpectRace(const char *file, int line,
+                        const volatile void *address,
+                        const char *description);
+void AnnotateBenignRace(const char *file, int line,
+                        const volatile void *address,
+                        const char *description);
+void AnnotateBenignRaceSized(const char *file, int line,
+                        const volatile void *address,
+                        long size,
+                        const char *description);
+void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
+                                  const volatile void *mu);
+void AnnotateTraceMemory(const char *file, int line,
+                         const volatile void *arg);
+void AnnotateThreadName(const char *file, int line,
+                        const char *name);
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreReadsBegin(const char *file, int line)
+    ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreReadsEnd(const char *file, int line)
+    ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreWritesBegin(const char *file, int line)
+    ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+ANNOTALYSIS_STATIC_INLINE
+void AnnotateIgnoreWritesEnd(const char *file, int line)
+    ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+void AnnotateEnableRaceDetection(const char *file, int line, int enable);
+void AnnotateNoOp(const char *file, int line,
+                  const volatile void *arg);
+void AnnotateFlushState(const char *file, int line);
+
+/* Return non-zero value if running under valgrind.
+
+  If "valgrind.h" is included into dynamic_annotations.c,
+  the regular valgrind mechanism will be used.
+  See http://valgrind.org/docs/manual/manual-core-adv.html about
+  RUNNING_ON_VALGRIND and other valgrind "client requests".
+  The file "valgrind.h" may be obtained by doing
+     svn co svn://svn.valgrind.org/valgrind/trunk/include
+
+  If for some reason you can't use "valgrind.h" or want to fake valgrind,
+  there are two ways to make this function return non-zero:
+    - Use environment variable: export RUNNING_ON_VALGRIND=1
+    - Make your tool intercept the function RunningOnValgrind() and
+      change its return value.
+ */
+int RunningOnValgrind(void);
+
+/* ValgrindSlowdown returns:
+    * 1.0, if (RunningOnValgrind() == 0)
+    * 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL)
+    * atof(getenv("VALGRIND_SLOWDOWN")) otherwise
+   This function can be used to scale timeout values:
+   EXAMPLE:
+   for (;;) {
+     DoExpensiveBackgroundTask();
+     SleepForSeconds(5 * ValgrindSlowdown());
+   }
+ */
+double ValgrindSlowdown(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus)
+
+  /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
+
+     Instead of doing
+        ANNOTATE_IGNORE_READS_BEGIN();
+        ... = x;
+        ANNOTATE_IGNORE_READS_END();
+     one can use
+        ... = ANNOTATE_UNPROTECTED_READ(x); */
+  template <class T>
+  inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
+      ANNOTALYSIS_UNPROTECTED_READ {
+    ANNOTATE_IGNORE_READS_BEGIN();
+    T res = x;
+    ANNOTATE_IGNORE_READS_END();
+    return res;
+  }
+  /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */
+  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)        \
+    namespace {                                                       \
+      class static_var ## _annotator {                                \
+       public:                                                        \
+        static_var ## _annotator() {                                  \
+          ANNOTATE_BENIGN_RACE_SIZED(&static_var,                     \
+                                      sizeof(static_var),             \
+            # static_var ": " description);                           \
+        }                                                             \
+      };                                                              \
+      static static_var ## _annotator the ## static_var ## _annotator;\
+    }
+#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
+
+  #define ANNOTATE_UNPROTECTED_READ(x) (x)
+  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)  /* empty */
+
+#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
+
+/* Annotalysis, a GCC based static analyzer, is able to understand and use
+   some of the dynamic annotations defined in this file. However, dynamic
+   annotations are usually disabled in the opt mode (to avoid additional
+   runtime overheads) while Annotalysis only works in the opt mode.
+   In order for Annotalysis to use these dynamic annotations when they
+   are disabled, we re-define these annotations here. Note that unlike the
+   original macro definitions above, these macros are expanded to calls to
+   static inline functions so that the compiler will be able to remove the
+   calls after the analysis. */
+
+#ifdef ANNOTALYSIS_ONLY
+
+  #undef ANNOTALYSIS_ONLY
+
+  /* Undefine and re-define the macros that the static analyzer understands. */
+  #undef ANNOTATE_IGNORE_READS_BEGIN
+  #define ANNOTATE_IGNORE_READS_BEGIN()           \
+    AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
+
+  #undef ANNOTATE_IGNORE_READS_END
+  #define ANNOTATE_IGNORE_READS_END()             \
+    AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
+
+  #undef ANNOTATE_IGNORE_WRITES_BEGIN
+  #define ANNOTATE_IGNORE_WRITES_BEGIN()          \
+    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+
+  #undef ANNOTATE_IGNORE_WRITES_END
+  #define ANNOTATE_IGNORE_WRITES_END()            \
+    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
+
+  #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN()       \
+    do {                                                 \
+      ANNOTATE_IGNORE_READS_BEGIN();                     \
+      ANNOTATE_IGNORE_WRITES_BEGIN();                    \
+    }while(0)                                            \
+
+  #undef ANNOTATE_IGNORE_READS_AND_WRITES_END
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_END()  \
+    do {                                          \
+      ANNOTATE_IGNORE_WRITES_END();               \
+      ANNOTATE_IGNORE_READS_END();                \
+    }while(0)                                     \
+
+  #if defined(__cplusplus)
+    #undef ANNOTATE_UNPROTECTED_READ
+    template <class T>
+    inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
+         ANNOTALYSIS_UNPROTECTED_READ {
+      ANNOTATE_IGNORE_READS_BEGIN();
+      T res = x;
+      ANNOTATE_IGNORE_READS_END();
+      return res;
+    }
+  #endif /* __cplusplus */
+
+#endif /* ANNOTALYSIS_ONLY */
+
+/* Undefine the macros intended only in this file. */
+#undef ANNOTALYSIS_STATIC_INLINE
+#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
+
+#endif  /* BASE_DYNAMIC_ANNOTATIONS_H_ */

diff --git a/src/base/elf_mem_image.cc b/src/base/elf_mem_image.cc
new file mode 100644
index 0000000..d2ca1a5
--- /dev/null
+++ b/src/base/elf_mem_image.cc

@@ -0,0 +1,434 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup in an in-memory Elf image.
+//
+
+#include "base/elf_mem_image.h"
+
+#ifdef HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
+
+#include <stddef.h>   // for size_t, ptrdiff_t
+#include "base/logging.h"
+
+// From binutils/include/elf/common.h (this doesn't appear to be documented
+// anywhere else).
+//
+//   /* This flag appears in a Versym structure.  It means that the symbol
+//      is hidden, and is only visible with an explicit version number.
+//      This is a GNU extension.  */
+//   #define VERSYM_HIDDEN           0x8000
+//
+//   /* This is the mask for the rest of the Versym information.  */
+//   #define VERSYM_VERSION          0x7fff
+
+#define VERSYM_VERSION 0x7fff
+
+namespace base {
+
+namespace {
+template <int N> class ElfClass {
+ public:
+  static const int kElfClass = -1;
+  static int ElfBind(const ElfW(Sym) *) {
+    CHECK(false); // << "Unexpected word size";
+    return 0;
+  }
+  static int ElfType(const ElfW(Sym) *) {
+    CHECK(false); // << "Unexpected word size";
+    return 0;
+  }
+};
+
+template <> class ElfClass<32> {
+ public:
+  static const int kElfClass = ELFCLASS32;
+  static int ElfBind(const ElfW(Sym) *symbol) {
+    return ELF32_ST_BIND(symbol->st_info);
+  }
+  static int ElfType(const ElfW(Sym) *symbol) {
+    return ELF32_ST_TYPE(symbol->st_info);
+  }
+};
+
+template <> class ElfClass<64> {
+ public:
+  static const int kElfClass = ELFCLASS64;
+  static int ElfBind(const ElfW(Sym) *symbol) {
+    return ELF64_ST_BIND(symbol->st_info);
+  }
+  static int ElfType(const ElfW(Sym) *symbol) {
+    return ELF64_ST_TYPE(symbol->st_info);
+  }
+};
+
+typedef ElfClass<__WORDSIZE> CurrentElfClass;
+
+// Extract an element from one of the ELF tables, cast it to desired type.
+// This is just a simple arithmetic and a glorified cast.
+// Callers are responsible for bounds checking.
+template <class T>
+const T* GetTableElement(const ElfW(Ehdr) *ehdr,
+                         ElfW(Off) table_offset,
+                         ElfW(Word) element_size,
+                         size_t index) {
+  return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
+                                    + table_offset
+                                    + index * element_size);
+}
+}  // namespace
+
+const void *const ElfMemImage::kInvalidBase =
+    reinterpret_cast<const void *>(~0L);
+
+ElfMemImage::ElfMemImage(const void *base) {
+  CHECK(base != kInvalidBase);
+  Init(base);
+}
+
+int ElfMemImage::GetNumSymbols() const {
+  if (!hash_) {
+    return 0;
+  }
+  // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
+  return hash_[1];
+}
+
+const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
+  CHECK_LT(index, GetNumSymbols());
+  return dynsym_ + index;
+}
+
+const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
+  CHECK_LT(index, GetNumSymbols());
+  return versym_ + index;
+}
+
+const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
+  CHECK_LT(index, ehdr_->e_phnum);
+  return GetTableElement<ElfW(Phdr)>(ehdr_,
+                                     ehdr_->e_phoff,
+                                     ehdr_->e_phentsize,
+                                     index);
+}
+
+const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
+  CHECK_LT(offset, strsize_);
+  return dynstr_ + offset;
+}
+
+const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
+  if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
+    // Symbol corresponds to "special" (e.g. SHN_ABS) section.
+    return reinterpret_cast<const void *>(sym->st_value);
+  }
+  CHECK_LT(link_base_, sym->st_value);
+  return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_;
+}
+
+const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
+  CHECK_LE(index, verdefnum_);
+  const ElfW(Verdef) *version_definition = verdef_;
+  while (version_definition->vd_ndx < index && version_definition->vd_next) {
+    const char *const version_definition_as_char =
+        reinterpret_cast<const char *>(version_definition);
+    version_definition =
+        reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
+                                               version_definition->vd_next);
+  }
+  return version_definition->vd_ndx == index ? version_definition : NULL;
+}
+
+const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
+    const ElfW(Verdef) *verdef) const {
+  return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
+}
+
+const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
+  CHECK_LT(offset, strsize_);
+  return dynstr_ + offset;
+}
+
+void ElfMemImage::Init(const void *base) {
+  ehdr_      = NULL;
+  dynsym_    = NULL;
+  dynstr_    = NULL;
+  versym_    = NULL;
+  verdef_    = NULL;
+  hash_      = NULL;
+  strsize_   = 0;
+  verdefnum_ = 0;
+  link_base_ = ~0L;  // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
+  if (!base) {
+    return;
+  }
+  const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base);
+  // Fake VDSO has low bit set.
+  const bool fake_vdso = ((base_as_uintptr_t & 1) != 0);
+  base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1);
+  const char *const base_as_char = reinterpret_cast<const char *>(base);
+  if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
+      base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
+    RAW_DCHECK(false, "no ELF magic"); // at %p", base);
+    return;
+  }
+  int elf_class = base_as_char[EI_CLASS];
+  if (elf_class != CurrentElfClass::kElfClass) {
+    DCHECK_EQ(elf_class, CurrentElfClass::kElfClass);
+    return;
+  }
+  switch (base_as_char[EI_DATA]) {
+    case ELFDATA2LSB: {
+      if (__LITTLE_ENDIAN != __BYTE_ORDER) {
+        DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
+        return;
+      }
+      break;
+    }
+    case ELFDATA2MSB: {
+      if (__BIG_ENDIAN != __BYTE_ORDER) {
+        DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
+        return;
+      }
+      break;
+    }
+    default: {
+      RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA];
+      return;
+    }
+  }
+
+  ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
+  const ElfW(Phdr) *dynamic_program_header = NULL;
+  for (int i = 0; i < ehdr_->e_phnum; ++i) {
+    const ElfW(Phdr) *const program_header = GetPhdr(i);
+    switch (program_header->p_type) {
+      case PT_LOAD:
+        if (link_base_ == ~0L) {
+          link_base_ = program_header->p_vaddr;
+        }
+        break;
+      case PT_DYNAMIC:
+        dynamic_program_header = program_header;
+        break;
+    }
+  }
+  if (link_base_ == ~0L || !dynamic_program_header) {
+    RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO");
+    RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO");
+    // Mark this image as not present. Can not recur infinitely.
+    Init(0);
+    return;
+  }
+  ptrdiff_t relocation =
+      base_as_char - reinterpret_cast<const char *>(link_base_);
+  ElfW(Dyn) *dynamic_entry =
+      reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr +
+                                    relocation);
+  for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
+    ElfW(Xword) value = dynamic_entry->d_un.d_val;
+    if (fake_vdso) {
+      // A complication: in the real VDSO, dynamic entries are not relocated
+      // (it wasn't loaded by a dynamic loader). But when testing with a
+      // "fake" dlopen()ed vdso library, the loader relocates some (but
+      // not all!) of them before we get here.
+      if (dynamic_entry->d_tag == DT_VERDEF) {
+        // The only dynamic entry (of the ones we care about) libc-2.3.6
+        // loader doesn't relocate.
+        value += relocation;
+      }
+    } else {
+      // Real VDSO. Everything needs to be relocated.
+      value += relocation;
+    }
+    switch (dynamic_entry->d_tag) {
+      case DT_HASH:
+        hash_ = reinterpret_cast<ElfW(Word) *>(value);
+        break;
+      case DT_SYMTAB:
+        dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
+        break;
+      case DT_STRTAB:
+        dynstr_ = reinterpret_cast<const char *>(value);
+        break;
+      case DT_VERSYM:
+        versym_ = reinterpret_cast<ElfW(Versym) *>(value);
+        break;
+      case DT_VERDEF:
+        verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
+        break;
+      case DT_VERDEFNUM:
+        verdefnum_ = dynamic_entry->d_un.d_val;
+        break;
+      case DT_STRSZ:
+        strsize_ = dynamic_entry->d_un.d_val;
+        break;
+      default:
+        // Unrecognized entries explicitly ignored.
+        break;
+    }
+  }
+  if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
+      !verdef_ || !verdefnum_ || !strsize_) {
+    RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)");
+    RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)");
+    RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)");
+    RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)");
+    RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)");
+    RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)");
+    RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)");
+    // Mark this image as not present. Can not recur infinitely.
+    Init(0);
+    return;
+  }
+}
+
+bool ElfMemImage::LookupSymbol(const char *name,
+                               const char *version,
+                               int type,
+                               SymbolInfo *info) const {
+  for (SymbolIterator it = begin(); it != end(); ++it) {
+    if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 &&
+        CurrentElfClass::ElfType(it->symbol) == type) {
+      if (info) {
+        *info = *it;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ElfMemImage::LookupSymbolByAddress(const void *address,
+                                        SymbolInfo *info_out) const {
+  for (SymbolIterator it = begin(); it != end(); ++it) {
+    const char *const symbol_start =
+        reinterpret_cast<const char *>(it->address);
+    const char *const symbol_end = symbol_start + it->symbol->st_size;
+    if (symbol_start <= address && address < symbol_end) {
+      if (info_out) {
+        // Client wants to know details for that symbol (the usual case).
+        if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) {
+          // Strong symbol; just return it.
+          *info_out = *it;
+          return true;
+        } else {
+          // Weak or local. Record it, but keep looking for a strong one.
+          *info_out = *it;
+        }
+      } else {
+        // Client only cares if there is an overlapping symbol.
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
+    : index_(index), image_(image) {
+}
+
+const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
+  return &info_;
+}
+
+const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
+  return info_;
+}
+
+bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
+  return this->image_ == rhs.image_ && this->index_ == rhs.index_;
+}
+
+bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
+  return !(*this == rhs);
+}
+
+ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
+  this->Update(1);
+  return *this;
+}
+
+ElfMemImage::SymbolIterator ElfMemImage::begin() const {
+  SymbolIterator it(this, 0);
+  it.Update(0);
+  return it;
+}
+
+ElfMemImage::SymbolIterator ElfMemImage::end() const {
+  return SymbolIterator(this, GetNumSymbols());
+}
+
+void ElfMemImage::SymbolIterator::Update(int increment) {
+  const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
+  CHECK(image->IsPresent() || increment == 0);
+  if (!image->IsPresent()) {
+    return;
+  }
+  index_ += increment;
+  if (index_ >= image->GetNumSymbols()) {
+    index_ = image->GetNumSymbols();
+    return;
+  }
+  const ElfW(Sym)    *symbol = image->GetDynsym(index_);
+  const ElfW(Versym) *version_symbol = image->GetVersym(index_);
+  CHECK(symbol && version_symbol);
+  const char *const symbol_name = image->GetDynstr(symbol->st_name);
+  const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
+  const ElfW(Verdef) *version_definition = NULL;
+  const char *version_name = "";
+  if (symbol->st_shndx == SHN_UNDEF) {
+    // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
+    // version_index could well be greater than verdefnum_, so calling
+    // GetVerdef(version_index) may trigger assertion.
+  } else {
+    version_definition = image->GetVerdef(version_index);
+  }
+  if (version_definition) {
+    // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
+    // optional 2nd if the version has a parent.
+    CHECK_LE(1, version_definition->vd_cnt);
+    CHECK_LE(version_definition->vd_cnt, 2);
+    const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
+    version_name = image->GetVerstr(version_aux->vda_name);
+  }
+  info_.name    = symbol_name;
+  info_.version = version_name;
+  info_.address = image->GetSymAddr(symbol);
+  info_.symbol  = symbol;
+}
+
+}  // namespace base
+
+#endif  // HAVE_ELF_MEM_IMAGE

diff --git a/src/base/elf_mem_image.h b/src/base/elf_mem_image.h
new file mode 100644
index 0000000..5fb00ff
--- /dev/null
+++ b/src/base/elf_mem_image.h

@@ -0,0 +1,135 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup for in-memory Elf images.
+
+#ifndef BASE_ELF_MEM_IMAGE_H_
+#define BASE_ELF_MEM_IMAGE_H_
+
+#include <config.h>
+#ifdef HAVE_FEATURES_H
+#include <features.h>   // for __GLIBC__
+#endif
+
+// Maybe one day we can rewrite this file not to require the elf
+// symbol extensions in glibc, but for right now we need them.
+#if defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__)
+
+#define HAVE_ELF_MEM_IMAGE 1
+
+#include <stdlib.h>
+#include <link.h>  // for ElfW
+
+namespace base {
+
+// An in-memory ELF image (may not exist on disk).
+class ElfMemImage {
+ public:
+  // Sentinel: there could never be an elf image at this address.
+  static const void *const kInvalidBase;
+
+  // Information about a single vdso symbol.
+  // All pointers are into .dynsym, .dynstr, or .text of the VDSO.
+  // Do not free() them or modify through them.
+  struct SymbolInfo {
+    const char      *name;      // E.g. "__vdso_getcpu"
+    const char      *version;   // E.g. "LINUX_2.6", could be ""
+                                // for unversioned symbol.
+    const void      *address;   // Relocated symbol address.
+    const ElfW(Sym) *symbol;    // Symbol in the dynamic symbol table.
+  };
+
+  // Supports iteration over all dynamic symbols.
+  class SymbolIterator {
+   public:
+    friend class ElfMemImage;
+    const SymbolInfo *operator->() const;
+    const SymbolInfo &operator*() const;
+    SymbolIterator& operator++();
+    bool operator!=(const SymbolIterator &rhs) const;
+    bool operator==(const SymbolIterator &rhs) const;
+   private:
+    SymbolIterator(const void *const image, int index);
+    void Update(int incr);
+    SymbolInfo info_;
+    int index_;
+    const void *const image_;
+  };
+
+
+  explicit ElfMemImage(const void *base);
+  void                 Init(const void *base);
+  bool                 IsPresent() const { return ehdr_ != NULL; }
+  const ElfW(Phdr)*    GetPhdr(int index) const;
+  const ElfW(Sym)*     GetDynsym(int index) const;
+  const ElfW(Versym)*  GetVersym(int index) const;
+  const ElfW(Verdef)*  GetVerdef(int index) const;
+  const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const;
+  const char*          GetDynstr(ElfW(Word) offset) const;
+  const void*          GetSymAddr(const ElfW(Sym) *sym) const;
+  const char*          GetVerstr(ElfW(Word) offset) const;
+  int                  GetNumSymbols() const;
+
+  SymbolIterator begin() const;
+  SymbolIterator end() const;
+
+  // Look up versioned dynamic symbol in the image.
+  // Returns false if image is not present, or doesn't contain given
+  // symbol/version/type combination.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbol(const char *name, const char *version,
+                    int symbol_type, SymbolInfo *info_out) const;
+
+  // Find info about symbol (if any) which overlaps given address.
+  // Returns true if symbol was found; false if image isn't present
+  // or doesn't have a symbol overlapping given address.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
+
+ private:
+  const ElfW(Ehdr) *ehdr_;
+  const ElfW(Sym) *dynsym_;
+  const ElfW(Versym) *versym_;
+  const ElfW(Verdef) *verdef_;
+  const ElfW(Word) *hash_;
+  const char *dynstr_;
+  size_t strsize_;
+  size_t verdefnum_;
+  ElfW(Addr) link_base_;     // Link-time base (p_vaddr of first PT_LOAD).
+};
+
+}  // namespace base
+
+#endif  // __ELF__ and __GLIBC__ and !__native_client__
+
+#endif  // BASE_ELF_MEM_IMAGE_H_

diff --git a/src/base/elfcore.h b/src/base/elfcore.h
new file mode 100644
index 0000000..d9599ed
--- /dev/null
+++ b/src/base/elfcore.h

@@ -0,0 +1,401 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2005-2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke, Carl Crous
+ */
+
+#ifndef _ELFCORE_H
+#define _ELFCORE_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* We currently only support x86-32, x86-64, ARM, MIPS, PPC on Linux.
+ * Porting to other related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
+     defined(__mips__) || defined(__PPC__)) && defined(__linux)
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <config.h>
+
+
+/* Define the DUMPER symbol to make sure that there is exactly one
+ * core dumper built into the library.
+ */
+#define DUMPER "ELF"
+
+/* By the time that we get a chance to read CPU registers in the
+ * calling thread, they are already in a not particularly useful
+ * state. Besides, there will be multiple frames on the stack that are
+ * just making the core file confusing. To fix this problem, we take a
+ * snapshot of the frame pointer, stack pointer, and instruction
+ * pointer at an earlier time, and then insert these values into the
+ * core file.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+  typedef struct i386_regs {    /* Normal (non-FPU) CPU registers            */
+  #ifdef __x86_64__
+    #define BP rbp
+    #define SP rsp
+    #define IP rip
+    uint64_t  r15,r14,r13,r12,rbp,rbx,r11,r10;
+    uint64_t  r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
+    uint64_t  rip,cs,eflags;
+    uint64_t  rsp,ss;
+    uint64_t  fs_base, gs_base;
+    uint64_t  ds,es,fs,gs;
+  #else
+    #define BP ebp
+    #define SP esp
+    #define IP eip
+    uint32_t  ebx, ecx, edx, esi, edi, ebp, eax;
+    uint16_t  ds, __ds, es, __es;
+    uint16_t  fs, __fs, gs, __gs;
+    uint32_t  orig_eax, eip;
+    uint16_t  cs, __cs;
+    uint32_t  eflags, esp;
+    uint16_t  ss, __ss;
+  #endif
+  } i386_regs;
+#elif defined(__ARM_ARCH_3__)
+  typedef struct arm_regs {     /* General purpose registers                 */
+    #define BP uregs[11]        /* Frame pointer                             */
+    #define SP uregs[13]        /* Stack pointer                             */
+    #define IP uregs[15]        /* Program counter                           */
+    #define LR uregs[14]        /* Link register                             */
+    long uregs[18];
+  } arm_regs;
+#elif defined(__mips__)
+  typedef struct mips_regs {
+    unsigned long pad[6];       /* Unused padding to match kernel structures */
+    unsigned long uregs[32];    /* General purpose registers.                */
+    unsigned long hi;           /* Used for multiplication and division.     */
+    unsigned long lo;
+    unsigned long cp0_epc;      /* Program counter.                          */
+    unsigned long cp0_badvaddr;
+    unsigned long cp0_status;
+    unsigned long cp0_cause;
+    unsigned long unused;
+  } mips_regs;
+#elif defined (__PPC__)
+  typedef struct ppc_regs {
+    #define SP uregs[1]         /* Stack pointer                             */
+    #define IP rip              /* Program counter                           */
+    #define LR lr               /* Link register                             */
+    unsigned long uregs[32];	/* General Purpose Registers - r0-r31.       */
+    double        fpr[32];	/* Floating-Point Registers - f0-f31.        */
+    unsigned long rip;		/* Program counter.                          */
+    unsigned long msr;
+    unsigned long ccr;
+    unsigned long lr;
+    unsigned long ctr;
+    unsigned long xeq;
+    unsigned long mq;
+  } ppc_regs;
+#endif
+
+#if defined(__i386__) && defined(__GNUC__)
+  /* On x86 we provide an optimized version of the FRAME() macro, if the
+   * compiler supports a GCC-style asm() directive. This results in somewhat
+   * more accurate values for CPU registers.
+   */
+  typedef struct Frame {
+    struct i386_regs uregs;
+    int              errno_;
+    pid_t            tid;
+  } Frame;
+  #define FRAME(f) Frame f;                                           \
+                   do {                                               \
+                     f.errno_ = errno;                                \
+                     f.tid    = sys_gettid();                         \
+                     __asm__ volatile (                               \
+                       "push %%ebp\n"                                 \
+                       "push %%ebx\n"                                 \
+                       "mov  %%ebx,0(%%eax)\n"                        \
+                       "mov  %%ecx,4(%%eax)\n"                        \
+                       "mov  %%edx,8(%%eax)\n"                        \
+                       "mov  %%esi,12(%%eax)\n"                       \
+                       "mov  %%edi,16(%%eax)\n"                       \
+                       "mov  %%ebp,20(%%eax)\n"                       \
+                       "mov  %%eax,24(%%eax)\n"                       \
+                       "mov  %%ds,%%ebx\n"                            \
+                       "mov  %%ebx,28(%%eax)\n"                       \
+                       "mov  %%es,%%ebx\n"                            \
+                       "mov  %%ebx,32(%%eax)\n"                       \
+                       "mov  %%fs,%%ebx\n"                            \
+                       "mov  %%ebx,36(%%eax)\n"                       \
+                       "mov  %%gs,%%ebx\n"                            \
+                       "mov  %%ebx, 40(%%eax)\n"                      \
+                       "call 0f\n"                                    \
+                     "0:pop %%ebx\n"                                  \
+                       "add  $1f-0b,%%ebx\n"                          \
+                       "mov  %%ebx,48(%%eax)\n"                       \
+                       "mov  %%cs,%%ebx\n"                            \
+                       "mov  %%ebx,52(%%eax)\n"                       \
+                       "pushf\n"                                      \
+                       "pop  %%ebx\n"                                 \
+                       "mov  %%ebx,56(%%eax)\n"                       \
+                       "mov  %%esp,%%ebx\n"                           \
+                       "add  $8,%%ebx\n"                              \
+                       "mov  %%ebx,60(%%eax)\n"                       \
+                       "mov  %%ss,%%ebx\n"                            \
+                       "mov  %%ebx,64(%%eax)\n"                       \
+                       "pop  %%ebx\n"                                 \
+                       "pop  %%ebp\n"                                 \
+                     "1:"                                             \
+                       : : "a" (&f) : "memory");                      \
+                     } while (0)
+  #define SET_FRAME(f,r)                                              \
+                     do {                                             \
+                       errno = (f).errno_;                            \
+                       (r)   = (f).uregs;                             \
+                     } while (0)
+#elif defined(__x86_64__) && defined(__GNUC__)
+  /* The FRAME and SET_FRAME macros for x86_64.  */
+  typedef struct Frame {
+    struct i386_regs uregs;
+    int              errno_;
+    pid_t            tid;
+  } Frame;
+  #define FRAME(f) Frame f;                                           \
+                   do {                                               \
+                     f.errno_ = errno;                                \
+                     f.tid    = sys_gettid();                         \
+                     __asm__ volatile (                               \
+                       "push %%rbp\n"                                 \
+                       "push %%rbx\n"                                 \
+                       "mov  %%r15,0(%%rax)\n"                        \
+                       "mov  %%r14,8(%%rax)\n"                        \
+                       "mov  %%r13,16(%%rax)\n"                       \
+                       "mov  %%r12,24(%%rax)\n"                       \
+                       "mov  %%rbp,32(%%rax)\n"                       \
+                       "mov  %%rbx,40(%%rax)\n"                       \
+                       "mov  %%r11,48(%%rax)\n"                       \
+                       "mov  %%r10,56(%%rax)\n"                       \
+                       "mov  %%r9,64(%%rax)\n"                        \
+                       "mov  %%r8,72(%%rax)\n"                        \
+                       "mov  %%rax,80(%%rax)\n"                       \
+                       "mov  %%rcx,88(%%rax)\n"                       \
+                       "mov  %%rdx,96(%%rax)\n"                       \
+                       "mov  %%rsi,104(%%rax)\n"                      \
+                       "mov  %%rdi,112(%%rax)\n"                      \
+                       "mov  %%ds,%%rbx\n"                            \
+                       "mov  %%rbx,184(%%rax)\n"                      \
+                       "mov  %%es,%%rbx\n"                            \
+                       "mov  %%rbx,192(%%rax)\n"                      \
+                       "mov  %%fs,%%rbx\n"                            \
+                       "mov  %%rbx,200(%%rax)\n"                      \
+                       "mov  %%gs,%%rbx\n"                            \
+                       "mov  %%rbx,208(%%rax)\n"                      \
+                       "call 0f\n"                                    \
+                     "0:pop %%rbx\n"                                  \
+                       "add  $1f-0b,%%rbx\n"                          \
+                       "mov  %%rbx,128(%%rax)\n"                      \
+                       "mov  %%cs,%%rbx\n"                            \
+                       "mov  %%rbx,136(%%rax)\n"                      \
+                       "pushf\n"                                      \
+                       "pop  %%rbx\n"                                 \
+                       "mov  %%rbx,144(%%rax)\n"                      \
+                       "mov  %%rsp,%%rbx\n"                           \
+                       "add  $16,%%ebx\n"                             \
+                       "mov  %%rbx,152(%%rax)\n"                      \
+                       "mov  %%ss,%%rbx\n"                            \
+                       "mov  %%rbx,160(%%rax)\n"                      \
+                       "pop  %%rbx\n"                                 \
+                       "pop  %%rbp\n"                                 \
+                     "1:"                                             \
+                       : : "a" (&f) : "memory");                      \
+                     } while (0)
+  #define SET_FRAME(f,r)                                              \
+                     do {                                             \
+                       errno = (f).errno_;                            \
+                       (f).uregs.fs_base = (r).fs_base;               \
+                       (f).uregs.gs_base = (r).gs_base;               \
+                       (r)   = (f).uregs;                             \
+                     } while (0)
+#elif defined(__ARM_ARCH_3__) && defined(__GNUC__)
+  /* ARM calling conventions are a little more tricky. A little assembly
+   * helps in obtaining an accurate snapshot of all registers.
+   */
+  typedef struct Frame {
+    struct arm_regs arm;
+    int             errno_;
+    pid_t           tid;
+  } Frame;
+  #define FRAME(f) Frame f;                                           \
+                   do {                                               \
+                     long cpsr;                                       \
+                     f.errno_ = errno;                                \
+                     f.tid    = sys_gettid();                         \
+                     __asm__ volatile(                                \
+                       "stmia %0, {r0-r15}\n" /* All integer regs   */\
+                       : : "r"(&f.arm) : "memory");                   \
+                     f.arm.uregs[16] = 0;                             \
+                     __asm__ volatile(                                \
+                       "mrs %0, cpsr\n"       /* Condition code reg */\
+                       : "=r"(cpsr));                                 \
+                     f.arm.uregs[17] = cpsr;                          \
+                   } while (0)
+  #define SET_FRAME(f,r)                                              \
+                     do {                                             \
+                       /* Don't override the FPU status register.   */\
+                       /* Use the value obtained from ptrace(). This*/\
+                       /* works, because our code does not perform  */\
+                       /* any FPU operations, itself.               */\
+                       long fps      = (f).arm.uregs[16];             \
+                       errno         = (f).errno_;                    \
+                       (r)           = (f).arm;                       \
+                       (r).uregs[16] = fps;                           \
+                     } while (0)
+#elif defined(__mips__) && defined(__GNUC__)
+  typedef struct Frame {
+    struct mips_regs mips_regs;
+    int              errno_;
+    pid_t            tid;
+  } Frame;
+  #define MIPSREG(n) ({ register unsigned long r __asm__("$"#n); r; })
+  #define FRAME(f) Frame f = { 0 };                                   \
+                   do {                                               \
+                     unsigned long hi, lo;                            \
+                     register unsigned long pc __asm__("$31");        \
+                     f.mips_regs.uregs[ 0] = MIPSREG( 0);             \
+                     f.mips_regs.uregs[ 1] = MIPSREG( 1);             \
+                     f.mips_regs.uregs[ 2] = MIPSREG( 2);             \
+                     f.mips_regs.uregs[ 3] = MIPSREG( 3);             \
+                     f.mips_regs.uregs[ 4] = MIPSREG( 4);             \
+                     f.mips_regs.uregs[ 5] = MIPSREG( 5);             \
+                     f.mips_regs.uregs[ 6] = MIPSREG( 6);             \
+                     f.mips_regs.uregs[ 7] = MIPSREG( 7);             \
+                     f.mips_regs.uregs[ 8] = MIPSREG( 8);             \
+                     f.mips_regs.uregs[ 9] = MIPSREG( 9);             \
+                     f.mips_regs.uregs[10] = MIPSREG(10);             \
+                     f.mips_regs.uregs[11] = MIPSREG(11);             \
+                     f.mips_regs.uregs[12] = MIPSREG(12);             \
+                     f.mips_regs.uregs[13] = MIPSREG(13);             \
+                     f.mips_regs.uregs[14] = MIPSREG(14);             \
+                     f.mips_regs.uregs[15] = MIPSREG(15);             \
+                     f.mips_regs.uregs[16] = MIPSREG(16);             \
+                     f.mips_regs.uregs[17] = MIPSREG(17);             \
+                     f.mips_regs.uregs[18] = MIPSREG(18);             \
+                     f.mips_regs.uregs[19] = MIPSREG(19);             \
+                     f.mips_regs.uregs[20] = MIPSREG(20);             \
+                     f.mips_regs.uregs[21] = MIPSREG(21);             \
+                     f.mips_regs.uregs[22] = MIPSREG(22);             \
+                     f.mips_regs.uregs[23] = MIPSREG(23);             \
+                     f.mips_regs.uregs[24] = MIPSREG(24);             \
+                     f.mips_regs.uregs[25] = MIPSREG(25);             \
+                     f.mips_regs.uregs[26] = MIPSREG(26);             \
+                     f.mips_regs.uregs[27] = MIPSREG(27);             \
+                     f.mips_regs.uregs[28] = MIPSREG(28);             \
+                     f.mips_regs.uregs[29] = MIPSREG(29);             \
+                     f.mips_regs.uregs[30] = MIPSREG(30);             \
+                     f.mips_regs.uregs[31] = MIPSREG(31);             \
+                     __asm__ volatile ("mfhi %0" : "=r"(hi));         \
+                     __asm__ volatile ("mflo %0" : "=r"(lo));         \
+                     __asm__ volatile ("jal 1f; 1:nop" : "=r"(pc));   \
+                     f.mips_regs.hi       = hi;                       \
+                     f.mips_regs.lo       = lo;                       \
+                     f.mips_regs.cp0_epc  = pc;                       \
+                     f.errno_             = errno;                    \
+                     f.tid                = sys_gettid();             \
+                   } while (0)
+  #define SET_FRAME(f,r)                                              \
+                   do {                                               \
+                     errno       = (f).errno_;                        \
+                     memcpy((r).uregs, (f).mips_regs.uregs,           \
+                            32*sizeof(unsigned long));                \
+                     (r).hi      = (f).mips_regs.hi;                  \
+                     (r).lo      = (f).mips_regs.lo;                  \
+                     (r).cp0_epc = (f).mips_regs.cp0_epc;             \
+                   } while (0)
+#else
+  /* If we do not have a hand-optimized assembly version of the FRAME()
+   * macro, we cannot reliably unroll the stack. So, we show a few additional
+   * stack frames for the coredumper.
+   */
+  typedef struct Frame {
+    pid_t tid;
+  } Frame;
+  #define FRAME(f) Frame f; do { f.tid = sys_gettid(); } while (0)
+  #define SET_FRAME(f,r) do { } while (0)
+#endif
+
+
+/* Internal function for generating a core file. This API can change without
+ * notice and is only supposed to be used internally by the core dumper.
+ *
+ * This function works for both single- and multi-threaded core
+ * dumps. If called as
+ *
+ *   FRAME(frame);
+ *   InternalGetCoreDump(&frame, 0, NULL, ap);
+ *
+ * it creates a core file that only contains information about the
+ * calling thread.
+ *
+ * Optionally, the caller can provide information about other threads
+ * by passing their process ids in "thread_pids". The process id of
+ * the caller should not be included in this array. All of the threads
+ * must have been attached to with ptrace(), prior to calling this
+ * function. They will be detached when "InternalGetCoreDump()" returns.
+ *
+ * This function either returns a file handle that can be read for obtaining
+ * a core dump, or "-1" in case of an error. In the latter case, "errno"
+ * will be set appropriately.
+ *
+ * While "InternalGetCoreDump()" is not technically async signal safe, you
+ * might be tempted to invoke it from a signal handler. The code goes to
+ * great lengths to make a best effort that this will actually work. But in
+ * any case, you must make sure that you preserve the value of "errno"
+ * yourself. It is guaranteed to be clobbered otherwise.
+ *
+ * Also, "InternalGetCoreDump" is not strictly speaking re-entrant. Again,
+ * it makes a best effort to behave reasonably when called in a multi-
+ * threaded environment, but it is ultimately the caller's responsibility
+ * to provide locking.
+ */
+int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids,
+                        va_list ap
+                     /* const struct CoreDumpParameters *params,
+                        const char *file_name,
+                        const char *PATH
+                      */);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* _ELFCORE_H */

diff --git a/src/base/googleinit.h b/src/base/googleinit.h
new file mode 100644
index 0000000..3ea411a
--- /dev/null
+++ b/src/base/googleinit.h

@@ -0,0 +1,74 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Jacob Hoffman-Andrews
+
+#ifndef _GOOGLEINIT_H
+#define _GOOGLEINIT_H
+
+#include "base/logging.h"
+
+class GoogleInitializer {
+ public:
+  typedef void (*VoidFunction)(void);
+  GoogleInitializer(const char* name, VoidFunction ctor, VoidFunction dtor)
+      : name_(name), destructor_(dtor) {
+    RAW_VLOG(10, "<GoogleModuleObject> constructing: %s\n", name_);
+    if (ctor)
+      ctor();
+  }
+  ~GoogleInitializer() {
+    RAW_VLOG(10, "<GoogleModuleObject> destroying: %s\n", name_);
+    if (destructor_)
+      destructor_();
+  }
+
+ private:
+  const char* const name_;
+  const VoidFunction destructor_;
+};
+
+#define REGISTER_MODULE_INITIALIZER(name, body)                 \
+  namespace {                                                   \
+    static void google_init_module_##name () { body; }          \
+    GoogleInitializer google_initializer_module_##name(#name,   \
+            google_init_module_##name, NULL);                   \
+  }
+
+#define REGISTER_MODULE_DESTRUCTOR(name, body)                  \
+  namespace {                                                   \
+    static void google_destruct_module_##name () { body; }      \
+    GoogleInitializer google_destructor_module_##name(#name,    \
+            NULL, google_destruct_module_##name);               \
+  }
+
+
+#endif /* _GOOGLEINIT_H */

diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h
new file mode 100644
index 0000000..56b8fac
--- /dev/null
+++ b/src/base/linux_syscall_support.h

@@ -0,0 +1,2484 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2005-2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+/* This file includes Linux-specific support functions common to the
+ * coredumper and the thread lister; primarily, this is a collection
+ * of direct system calls, and a couple of symbols missing from
+ * standard header files.
+ * There are a few options that the including file can set to control
+ * the behavior of this file:
+ *
+ * SYS_CPLUSPLUS:
+ *   The entire header file will normally be wrapped in 'extern "C" { }",
+ *   making it suitable for compilation as both C and C++ source. If you
+ *   do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit
+ *   the wrapping. N.B. doing so will suppress inclusion of all prerequisite
+ *   system header files, too. It is the caller's responsibility to provide
+ *   the necessary definitions.
+ *
+ * SYS_ERRNO:
+ *   All system calls will update "errno" unless overriden by setting the
+ *   SYS_ERRNO macro prior to including this file. SYS_ERRNO should be
+ *   an l-value.
+ *
+ * SYS_INLINE:
+ *   New symbols will be defined "static inline", unless overridden by
+ *   the SYS_INLINE macro.
+ *
+ * SYS_LINUX_SYSCALL_SUPPORT_H
+ *   This macro is used to avoid multiple inclusions of this header file.
+ *   If you need to include this file more than once, make sure to
+ *   unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion.
+ *
+ * SYS_PREFIX:
+ *   New system calls will have a prefix of "sys_" unless overridden by
+ *   the SYS_PREFIX macro. Valid values for this macro are [0..9] which
+ *   results in prefixes "sys[0..9]_". It is also possible to set this
+ *   macro to -1, which avoids all prefixes.
+ *
+ * This file defines a few internal symbols that all start with "LSS_".
+ * Do not access these symbols from outside this file. They are not part
+ * of the supported API.
+ *
+ * NOTE: This is a stripped down version of the official opensource
+ * version of linux_syscall_support.h, which lives at
+ *    http://code.google.com/p/linux-syscall-support/
+ * It includes only the syscalls that are used in perftools, plus a
+ * few extra.  Here's the breakdown:
+ * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u
+ *      sys__exit(
+ *      sys_clone(
+ *      sys_close(
+ *      sys_fcntl(
+ *      sys_fstat(
+ *      sys_futex(
+ *      sys_getcpu(
+ *      sys_getdents64(
+ *      sys_getppid(
+ *      sys_gettid(
+ *      sys_lseek(
+ *      sys_mmap(
+ *      sys_mremap(
+ *      sys_munmap(
+ *      sys_open(
+ *      sys_pipe(
+ *      sys_prctl(
+ *      sys_ptrace(
+ *      sys_ptrace_detach(
+ *      sys_read(
+ *      sys_sched_yield(
+ *      sys_sigaction(
+ *      sys_sigaltstack(
+ *      sys_sigdelset(
+ *      sys_sigfillset(
+ *      sys_sigprocmask(
+ *      sys_socket(
+ *      sys_stat(
+ *      sys_waitpid(
+ * 2) These are used as subroutines of the above:
+ *      sys_getpid       -- gettid
+ *      sys_kill         -- ptrace_detach
+ *      sys_restore      -- sigaction
+ *      sys_restore_rt   -- sigaction
+ *      sys_socketcall   -- socket
+ *      sys_wait4        -- waitpid
+ * 3) I left these in even though they're not used.  They either
+ * complement the above (write vs read) or are variants (rt_sigaction):
+ *      sys_fstat64
+ *      sys_llseek
+ *      sys_mmap2
+ *      sys_openat
+ *      sys_getdents
+ *      sys_rt_sigaction
+ *      sys_rt_sigprocmask
+ *      sys_sigaddset
+ *      sys_sigemptyset
+ *      sys_stat64
+ *      sys_write
+ */
+#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
+#define SYS_LINUX_SYSCALL_SUPPORT_H
+
+/* We currently only support x86-32, x86-64, ARM, MIPS, PPC/PPC64 and Aarch64 on Linux.
+ * Porting to other related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
+     defined(__mips__) || defined(__PPC__) || defined(__aarch64__)) && defined(__linux)
+
+#ifndef SYS_CPLUSPLUS
+#ifdef __cplusplus
+/* Some system header files in older versions of gcc neglect to properly
+ * handle being included from C++. As it appears to be harmless to have
+ * multiple nested 'extern "C"' blocks, just add another one here.
+ */
+extern "C" {
+#endif
+
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <endian.h>
+
+#ifdef __mips__
+/* Include definitions of the ABI currently in use.                          */
+#include <sgidefs.h>
+#endif
+
+#endif
+
+/* As glibc often provides subtly incompatible data structures (and implicit
+ * wrapper functions that convert them), we provide our own kernel data
+ * structures for use by the system calls.
+ * These structures have been developed by using Linux 2.6.23 headers for
+ * reference. Note though, we do not care about exact API compatibility
+ * with the kernel, and in fact the kernel often does not have a single
+ * API that works across architectures. Instead, we try to mimic the glibc
+ * API where reasonable, and only guarantee ABI compatibility with the
+ * kernel headers.
+ * Most notably, here are a few changes that were made to the structures
+ * defined by kernel headers:
+ *
+ * - we only define structures, but not symbolic names for kernel data
+ *   types. For the latter, we directly use the native C datatype
+ *   (i.e. "unsigned" instead of "mode_t").
+ * - in a few cases, it is possible to define identical structures for
+ *   both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by
+ *   standardizing on the 64bit version of the data types. In particular,
+ *   this means that we use "unsigned" where the 32bit headers say
+ *   "unsigned long".
+ * - overall, we try to minimize the number of cases where we need to
+ *   conditionally define different structures.
+ * - the "struct kernel_sigaction" class of structures have been
+ *   modified to more closely mimic glibc's API by introducing an
+ *   anonymous union for the function pointer.
+ * - a small number of field names had to have an underscore appended to
+ *   them, because glibc defines a global macro by the same name.
+ */
+
+/* include/linux/dirent.h                                                    */
+struct kernel_dirent64 {
+  unsigned long long d_ino;
+  long long          d_off;
+  unsigned short     d_reclen;
+  unsigned char      d_type;
+  char               d_name[256];
+};
+
+/* include/linux/dirent.h                                                    */
+struct kernel_dirent {
+  long               d_ino;
+  long               d_off;
+  unsigned short     d_reclen;
+  char               d_name[256];
+};
+
+/* include/linux/time.h                                                      */
+struct kernel_timespec {
+  long               tv_sec;
+  long               tv_nsec;
+};
+
+/* include/linux/time.h                                                      */
+struct kernel_timeval {
+  long               tv_sec;
+  long               tv_usec;
+};
+
+/* include/linux/resource.h                                                  */
+struct kernel_rusage {
+  struct kernel_timeval ru_utime;
+  struct kernel_timeval ru_stime;
+  long               ru_maxrss;
+  long               ru_ixrss;
+  long               ru_idrss;
+  long               ru_isrss;
+  long               ru_minflt;
+  long               ru_majflt;
+  long               ru_nswap;
+  long               ru_inblock;
+  long               ru_oublock;
+  long               ru_msgsnd;
+  long               ru_msgrcv;
+  long               ru_nsignals;
+  long               ru_nvcsw;
+  long               ru_nivcsw;
+};
+
+#if defined(__i386__) || defined(__arm__) || defined(__PPC__)
+
+/* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
+struct kernel_old_sigaction {
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
+  };
+  unsigned long      sa_mask;
+  unsigned long      sa_flags;
+  void               (*sa_restorer)(void);
+} __attribute__((packed,aligned(4)));
+#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+  #define kernel_old_sigaction kernel_sigaction
+#endif
+
+/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
+ * exactly match the size of the signal set, even though the API was
+ * intended to be extensible. We define our own KERNEL_NSIG to deal with
+ * this.
+ * Please note that glibc provides signals [1.._NSIG-1], whereas the
+ * kernel (and this header) provides the range [1..KERNEL_NSIG]. The
+ * actual number of signals is obviously the same, but the constants
+ * differ by one.
+ */
+#ifdef __mips__
+#define KERNEL_NSIG 128
+#else
+#define KERNEL_NSIG  64
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64}/signal.h                               */
+struct kernel_sigset_t {
+  unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
+                    (8*sizeof(unsigned long))];
+};
+
+/* include/asm-{arm,generic,i386,mips,x86_64,ppc}/signal.h                   */
+struct kernel_sigaction {
+#ifdef __mips__
+  unsigned long      sa_flags;
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
+  };
+  struct kernel_sigset_t sa_mask;
+#else
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
+  };
+  unsigned long      sa_flags;
+  void               (*sa_restorer)(void);
+  struct kernel_sigset_t sa_mask;
+#endif
+};
+
+/* include/asm-{arm,i386,mips,ppc}/stat.h                                    */
+#ifdef __mips__
+#if _MIPS_SIM == _MIPS_SIM_ABI64
+struct kernel_stat {
+#else
+struct kernel_stat64 {
+#endif
+  unsigned           st_dev;
+  unsigned           __pad0[3];
+  unsigned long long st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           st_rdev;
+  unsigned           __pad1[3];
+  long long          st_size;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned           st_blksize;
+  unsigned           __pad2;
+  unsigned long long st_blocks;
+};
+#elif defined __PPC__
+struct kernel_stat64 {
+  unsigned long long st_dev;
+  unsigned long long st_ino;
+  unsigned           st_nlink;
+  unsigned           st_mode;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  int                __pad2;
+  unsigned long long st_rdev;
+  long long          st_size;
+  long long          st_blksize;
+  long long          st_blocks;
+  kernel_timespec    st_atim;
+  kernel_timespec    st_mtim;
+  kernel_timespec    st_ctim;
+  unsigned long      __unused4;
+  unsigned long      __unused5;
+  unsigned long      __unused6;
+};
+#else
+struct kernel_stat64 {
+  unsigned long long st_dev;
+  unsigned char      __pad0[4];
+  unsigned           __st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned long long st_rdev;
+  unsigned char      __pad3[4];
+  long long          st_size;
+  unsigned           st_blksize;
+  unsigned long long st_blocks;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned long long st_ino;
+};
+#endif
+
+/* include/asm-{arm,generic,i386,mips,x86_64,ppc}/stat.h                     */
+#if defined(__i386__) || defined(__arm__)
+struct kernel_stat {
+  /* The kernel headers suggest that st_dev and st_rdev should be 32bit
+   * quantities encoding 12bit major and 20bit minor numbers in an interleaved
+   * format. In reality, we do not see useful data in the top bits. So,
+   * we'll leave the padding in here, until we find a better solution.
+   */
+  unsigned short     st_dev;
+  short              pad1;
+  unsigned           st_ino;
+  unsigned short     st_mode;
+  unsigned short     st_nlink;
+  unsigned short     st_uid;
+  unsigned short     st_gid;
+  unsigned short     st_rdev;
+  short              pad2;
+  unsigned           st_size;
+  unsigned           st_blksize;
+  unsigned           st_blocks;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned           __unused4;
+  unsigned           __unused5;
+};
+#elif defined(__x86_64__)
+struct kernel_stat {
+  uint64_t           st_dev;
+  uint64_t           st_ino;
+  uint64_t           st_nlink;
+  unsigned           st_mode;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           __pad0;
+  uint64_t           st_rdev;
+  int64_t            st_size;
+  int64_t            st_blksize;
+  int64_t            st_blocks;
+  uint64_t           st_atime_;
+  uint64_t           st_atime_nsec_;
+  uint64_t           st_mtime_;
+  uint64_t           st_mtime_nsec_;
+  uint64_t           st_ctime_;
+  uint64_t           st_ctime_nsec_;
+  int64_t            __unused[3];
+};
+#elif defined(__PPC__)
+struct kernel_stat {
+  unsigned long long st_dev;
+  unsigned long      st_ino;
+  unsigned long      st_nlink;
+  unsigned long      st_mode;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  int                __pad2;
+  unsigned long long st_rdev;
+  long               st_size;
+  unsigned long      st_blksize;
+  unsigned long      st_blocks;
+  kernel_timespec    st_atim;
+  kernel_timespec    st_mtim;
+  kernel_timespec    st_ctim;
+  unsigned long      __unused4;
+  unsigned long      __unused5;
+  unsigned long      __unused6;
+};
+#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
+struct kernel_stat {
+  unsigned           st_dev;
+  int                st_pad1[3];
+  unsigned           st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           st_rdev;
+  int                st_pad2[2];
+  long               st_size;
+  int                st_pad3;
+  long               st_atime_;
+  long               st_atime_nsec_;
+  long               st_mtime_;
+  long               st_mtime_nsec_;
+  long               st_ctime_;
+  long               st_ctime_nsec_;
+  int                st_blksize;
+  int                st_blocks;
+  int                st_pad4[14];
+};
+#elif defined(__aarch64__)
+struct kernel_stat {
+  unsigned long      st_dev;
+  unsigned long      st_ino;
+  unsigned int       st_mode;
+  unsigned int       st_nlink;
+  unsigned int       st_uid;
+  unsigned int       st_gid;
+  unsigned long      st_rdev;
+  unsigned long      __pad1;
+  long               st_size;
+  int                st_blksize;
+  int                __pad2;
+  long               st_blocks;
+  long               st_atime_;
+  unsigned long      st_atime_nsec_;
+  long               st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  long               st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned int       __unused4;
+  unsigned int       __unused5;
+};
+#endif
+
+
+/* Definitions missing from the standard header files                        */
+#ifndef O_DIRECTORY
+#if defined(__arm__)
+#define O_DIRECTORY             0040000
+#else
+#define O_DIRECTORY             0200000
+#endif
+#endif
+#ifndef PR_GET_DUMPABLE
+#define PR_GET_DUMPABLE         3
+#endif
+#ifndef PR_SET_DUMPABLE
+#define PR_SET_DUMPABLE         4
+#endif
+#ifndef AT_FDCWD
+#define AT_FDCWD                (-100)
+#endif
+#ifndef AT_SYMLINK_NOFOLLOW
+#define AT_SYMLINK_NOFOLLOW     0x100
+#endif
+#ifndef AT_REMOVEDIR
+#define AT_REMOVEDIR            0x200
+#endif
+#ifndef MREMAP_FIXED
+#define MREMAP_FIXED            2
+#endif
+#ifndef SA_RESTORER
+#define SA_RESTORER             0x04000000
+#endif
+
+#if defined(__i386__)
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       174
+#define __NR_rt_sigprocmask     175
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            197
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         220
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             224
+#endif
+#ifndef __NR_futex
+#define __NR_futex              240
+#endif
+#ifndef __NR_openat
+#define __NR_openat             295
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             318
+#endif
+/* End of i386 definitions                                                   */
+#elif defined(__arm__)
+#ifndef __syscall
+#if defined(__thumb__) || defined(__ARM_EABI__)
+#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name;
+#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs
+#define __syscall(name) "swi\t0"
+#define __syscall_safe(name)                     \
+  "push  {r7}\n"                                 \
+  "mov   r7,%[sysreg]\n"                         \
+  __syscall(name)"\n"                            \
+  "pop   {r7}"
+#else
+#define __SYS_REG(name)
+#define __SYS_REG_LIST(regs...) regs
+#define __syscall(name) "swi\t" __sys1(__NR_##name) ""
+#define __syscall_safe(name) __syscall(name)
+#endif
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask     (__NR_SYSCALL_BASE + 175)
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             (__NR_SYSCALL_BASE + 195)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            (__NR_SYSCALL_BASE + 197)
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         (__NR_SYSCALL_BASE + 217)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_SYSCALL_BASE + 224)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_SYSCALL_BASE + 240)
+#endif
+/* End of ARM definitions                                                  */
+#elif defined(__x86_64__)
+#ifndef __NR_gettid
+#define __NR_gettid             186
+#endif
+#ifndef __NR_futex
+#define __NR_futex              202
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         217
+#endif
+#ifndef __NR_openat
+#define __NR_openat             257
+#endif
+/* End of x86-64 definitions                                                 */
+#elif defined(__mips__)
+#if _MIPS_SIM == _MIPS_SIM_ABI32
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       (__NR_Linux + 194)
+#define __NR_rt_sigprocmask     (__NR_Linux + 195)
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             (__NR_Linux + 213)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            (__NR_Linux + 215)
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         (__NR_Linux + 219)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 222)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 238)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 288)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 293)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 312)
+#endif
+/* End of MIPS (old 32bit API) definitions */
+#elif  _MIPS_SIM == _MIPS_SIM_ABI64
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 178)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 194)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 247)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 252)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 271)
+#endif
+/* End of MIPS (64bit API) definitions */
+#else
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 178)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 194)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 251)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 256)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 275)
+#endif
+/* End of MIPS (new 32bit API) definitions                                   */
+#endif
+/* End of MIPS definitions                                                   */
+#elif defined(__PPC__)
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       173
+#define __NR_rt_sigprocmask     174
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            197
+#endif
+#ifndef __NR_socket
+#define __NR_socket             198
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         202
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             207
+#endif
+#ifndef __NR_futex
+#define __NR_futex              221
+#endif
+#ifndef __NR_openat
+#define __NR_openat             286
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             302
+#endif
+/* End of powerpc defininitions                                              */
+#elif defined(__aarch64__)
+#ifndef __NR_fstatat
+#define __NR_fstatat             79
+#endif
+/* End of aarch64 defininitions                                              */
+#endif
+
+
+/* After forking, we must make sure to only call system calls.               */
+#if __BOUNDED_POINTERS__
+  #error "Need to port invocations of syscalls for bounded ptrs"
+#else
+  /* The core dumper and the thread lister get executed after threads
+   * have been suspended. As a consequence, we cannot call any functions
+   * that acquire locks. Unfortunately, libc wraps most system calls
+   * (e.g. in order to implement pthread_atfork, and to make calls
+   * cancellable), which means we cannot call these functions. Instead,
+   * we have to call syscall() directly.
+   */
+  #undef LSS_ERRNO
+  #ifdef SYS_ERRNO
+    /* Allow the including file to override the location of errno. This can
+     * be useful when using clone() with the CLONE_VM option.
+     */
+    #define LSS_ERRNO SYS_ERRNO
+  #else
+    #define LSS_ERRNO errno
+  #endif
+
+  #undef LSS_INLINE
+  #ifdef SYS_INLINE
+    #define LSS_INLINE SYS_INLINE
+  #else
+    #define LSS_INLINE static inline
+  #endif
+
+  /* Allow the including file to override the prefix used for all new
+   * system calls. By default, it will be set to "sys_".
+   */
+  #undef LSS_NAME
+  #ifndef SYS_PREFIX
+    #define LSS_NAME(name) sys_##name
+  #elif SYS_PREFIX < 0
+    #define LSS_NAME(name) name
+  #elif SYS_PREFIX == 0
+    #define LSS_NAME(name) sys0_##name
+  #elif SYS_PREFIX == 1
+    #define LSS_NAME(name) sys1_##name
+  #elif SYS_PREFIX == 2
+    #define LSS_NAME(name) sys2_##name
+  #elif SYS_PREFIX == 3
+    #define LSS_NAME(name) sys3_##name
+  #elif SYS_PREFIX == 4
+    #define LSS_NAME(name) sys4_##name
+  #elif SYS_PREFIX == 5
+    #define LSS_NAME(name) sys5_##name
+  #elif SYS_PREFIX == 6
+    #define LSS_NAME(name) sys6_##name
+  #elif SYS_PREFIX == 7
+    #define LSS_NAME(name) sys7_##name
+  #elif SYS_PREFIX == 8
+    #define LSS_NAME(name) sys8_##name
+  #elif SYS_PREFIX == 9
+    #define LSS_NAME(name) sys9_##name
+  #endif
+
+  #undef  LSS_RETURN
+  #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) ||        \
+       defined(__aarch64__))
+  /* Failing system calls return a negative result in the range of
+   * -1..-4095. These are "errno" values with the sign inverted.
+   */
+  #define LSS_RETURN(type, res)                                               \
+    do {                                                                      \
+      if ((unsigned long)(res) >= (unsigned long)(-4095)) {                   \
+        LSS_ERRNO = -(res);                                                   \
+        res = -1;                                                             \
+      }                                                                       \
+      return (type) (res);                                                    \
+    } while (0)
+  #elif defined(__mips__)
+  /* On MIPS, failing system calls return -1, and set errno in a
+   * separate CPU register.
+   */
+  #define LSS_RETURN(type, res, err)                                          \
+    do {                                                                      \
+      if (err) {                                                              \
+        LSS_ERRNO = (res);                                                    \
+        res = -1;                                                             \
+      }                                                                       \
+      return (type) (res);                                                    \
+    } while (0)
+  #elif defined(__PPC__)
+  /* On PPC, failing system calls return -1, and set errno in a
+   * separate CPU register. See linux/unistd.h.
+   */
+  #define LSS_RETURN(type, res, err)                                          \
+   do {                                                                       \
+     if (err & 0x10000000 ) {                                                 \
+       LSS_ERRNO = (res);                                                     \
+       res = -1;                                                              \
+     }                                                                        \
+     return (type) (res);                                                     \
+   } while (0)
+  #endif
+  #if defined(__i386__)
+    #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404)
+      /* This only works for GCC-4.4 and above -- the first version to use
+         .cfi directives for dwarf unwind info.  */
+      #define CFI_ADJUST_CFA_OFFSET(adjust)                                   \
+                  ".cfi_adjust_cfa_offset " #adjust "\n"
+    #else
+      #define CFI_ADJUST_CFA_OFFSET(adjust) /**/
+    #endif
+
+    /* In PIC mode (e.g. when building shared libraries), gcc for i386
+     * reserves ebx. Unfortunately, most distribution ship with implementations
+     * of _syscallX() which clobber ebx.
+     * Also, most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all of the _syscallX() macros.
+     */
+    #undef  LSS_BODY
+    #define LSS_BODY(type,args...)                                            \
+      long __res;                                                             \
+      __asm__ __volatile__("push %%ebx\n"                                     \
+                           CFI_ADJUST_CFA_OFFSET(4)                           \
+                           "movl %2,%%ebx\n"                                  \
+                           "int $0x80\n"                                      \
+                           "pop %%ebx\n"                                      \
+                           CFI_ADJUST_CFA_OFFSET(-4)                          \
+                           args                                               \
+                           : "esp", "memory");                                \
+      LSS_RETURN(type,__res)
+    #undef  _syscall0
+    #define _syscall0(type,name)                                              \
+      type LSS_NAME(name)(void) {                                             \
+        long __res;                                                           \
+        __asm__ volatile("int $0x80"                                          \
+                         : "=a" (__res)                                       \
+                         : "0" (__NR_##name)                                  \
+                         : "memory");                                         \
+        LSS_RETURN(type,__res);                                               \
+      }
+    #undef  _syscall1
+    #define _syscall1(type,name,type1,arg1)                                   \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)));                       \
+      }
+    #undef  _syscall2
+    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
+      type LSS_NAME(name)(type1 arg1,type2 arg2) {                            \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2)));    \
+      }
+    #undef  _syscall3
+    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
+      type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) {                 \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
+               "d" ((long)(arg3)));                                           \
+      }
+    #undef  _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
+               "d" ((long)(arg3)),"S" ((long)(arg4)));                        \
+      }
+    #undef  _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        long __res;                                                           \
+        __asm__ __volatile__("push %%ebx\n"                                   \
+                             "movl %2,%%ebx\n"                                \
+                             "movl %1,%%eax\n"                                \
+                             "int  $0x80\n"                                   \
+                             "pop  %%ebx"                                     \
+                             : "=a" (__res)                                   \
+                             : "i" (__NR_##name), "ri" ((long)(arg1)),        \
+                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
+                             : "esp", "memory");                              \
+        LSS_RETURN(type,__res);                                               \
+      }
+    #undef  _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        long __res;                                                           \
+        struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 };   \
+        __asm__ __volatile__("push %%ebp\n"                                   \
+                             "push %%ebx\n"                                   \
+                             "movl 4(%2),%%ebp\n"                             \
+                             "movl 0(%2), %%ebx\n"                            \
+                             "movl %1,%%eax\n"                                \
+                             "int  $0x80\n"                                   \
+                             "pop  %%ebx\n"                                   \
+                             "pop  %%ebp"                                     \
+                             : "=a" (__res)                                   \
+                             : "i" (__NR_##name),  "0" ((long)(&__s)),        \
+                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
+                             : "esp", "memory");                              \
+        LSS_RETURN(type,__res);                                               \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      __asm__ __volatile__(/* if (fn == NULL)
+                            *   return -EINVAL;
+                            */
+                           "movl   %3,%%ecx\n"
+                           "jecxz  1f\n"
+
+                           /* if (child_stack == NULL)
+                            *   return -EINVAL;
+                            */
+                           "movl   %4,%%ecx\n"
+                           "jecxz  1f\n"
+
+                           /* Set up alignment of the child stack:
+                            * child_stack = (child_stack & ~0xF) - 20;
+                            */
+                           "andl   $-16,%%ecx\n"
+                           "subl   $20,%%ecx\n"
+
+                           /* Push "arg" and "fn" onto the stack that will be
+                            * used by the child.
+                            */
+                           "movl   %6,%%eax\n"
+                           "movl   %%eax,4(%%ecx)\n"
+                           "movl   %3,%%eax\n"
+                           "movl   %%eax,(%%ecx)\n"
+
+                           /* %eax = syscall(%eax = __NR_clone,
+                            *                %ebx = flags,
+                            *                %ecx = child_stack,
+                            *                %edx = parent_tidptr,
+                            *                %esi = newtls,
+                            *                %edi = child_tidptr)
+                            * Also, make sure that %ebx gets preserved as it is
+                            * used in PIC mode.
+                            */
+                           "movl   %8,%%esi\n"
+                           "movl   %7,%%edx\n"
+                           "movl   %5,%%eax\n"
+                           "movl   %9,%%edi\n"
+                           "pushl  %%ebx\n"
+                           "movl   %%eax,%%ebx\n"
+                           "movl   %2,%%eax\n"
+                           "int    $0x80\n"
+
+                           /* In the parent: restore %ebx
+                            * In the child:  move "fn" into %ebx
+                            */
+                           "popl   %%ebx\n"
+
+                           /* if (%eax != 0)
+                            *   return %eax;
+                            */
+                           "test   %%eax,%%eax\n"
+                           "jnz    1f\n"
+
+                           /* In the child, now. Terminate frame pointer chain.
+                            */
+                           "movl   $0,%%ebp\n"
+
+                           /* Call "fn". "arg" is already on the stack.
+                            */
+                           "call   *%%ebx\n"
+
+                           /* Call _exit(%ebx). Unfortunately older versions
+                            * of gcc restrict the number of arguments that can
+                            * be passed to asm(). So, we need to hard-code the
+                            * system call number.
+                            */
+                           "movl   %%eax,%%ebx\n"
+                           "movl   $1,%%eax\n"
+                           "int    $0x80\n"
+
+                           /* Return to parent.
+                            */
+                         "1:\n"
+                           : "=a" (__res)
+                           : "0"(-EINVAL), "i"(__NR_clone),
+                             "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
+                             "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
+                           : "esp", "memory", "ecx", "edx", "esi", "edi");
+      LSS_RETURN(int, __res);
+    }
+
+    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+      /* On i386, the kernel does not know how to return from a signal
+       * handler. Instead, it relies on user space to provide a
+       * restorer function that calls the {rt_,}sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:movl   %1,%%eax\n"
+                           "int    $0x80\n"
+                         "2:popl   %0\n"
+                           "addl   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_rt_sigreturn));
+      return res;
+    }
+    LSS_INLINE void (*LSS_NAME(restore)(void))(void) {
+      /* On i386, the kernel does not know how to return from a signal
+       * handler. Instead, it relies on user space to provide a
+       * restorer function that calls the {rt_,}sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:pop    %%eax\n"
+                           "movl   %1,%%eax\n"
+                           "int    $0x80\n"
+                         "2:popl   %0\n"
+                           "addl   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_sigreturn));
+      return res;
+    }
+  #elif defined(__x86_64__)
+    /* There are no known problems with any of the _syscallX() macros
+     * currently shipping for x86_64, but we still need to be able to define
+     * our own version so that we can override the location of the errno
+     * location (e.g. when using the clone() system call with the CLONE_VM
+     * option).
+     */
+    #undef  LSS_ENTRYPOINT
+    #define LSS_ENTRYPOINT "syscall\n"
+
+    /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
+     * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
+     * sign extension.  We can't cast pointers directly because those are
+     * 32 bits, and gcc will dump ugly warnings about casting from a pointer
+     * to an integer of a different size.
+     */
+    #undef  LSS_SYSCALL_ARG
+    #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
+    #undef  _LSS_RETURN
+    #define _LSS_RETURN(type, res, cast)                                      \
+      do {                                                                    \
+        if ((uint64_t)(res) >= (uint64_t)(-4095)) {                           \
+          LSS_ERRNO = -(res);                                                 \
+          res = -1;                                                           \
+        }                                                                     \
+        return (type)(cast)(res);                                             \
+      } while (0)
+    #undef  LSS_RETURN
+    #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
+
+    #undef  _LSS_BODY
+    #define _LSS_BODY(nr, type, name, cast, ...)                              \
+          long long __res;                                                    \
+          __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT                \
+            : "=a" (__res)                                                    \
+            : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__)                 \
+            : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory");                   \
+          _LSS_RETURN(type, __res, cast)
+    #undef  LSS_BODY
+    #define LSS_BODY(nr, type, name, args...) \
+      _LSS_BODY(nr, type, name, uintptr_t, ## args)
+
+    #undef  LSS_BODY_ASM0
+    #undef  LSS_BODY_ASM1
+    #undef  LSS_BODY_ASM2
+    #undef  LSS_BODY_ASM3
+    #undef  LSS_BODY_ASM4
+    #undef  LSS_BODY_ASM5
+    #undef  LSS_BODY_ASM6
+    #define LSS_BODY_ASM0
+    #define LSS_BODY_ASM1 LSS_BODY_ASM0
+    #define LSS_BODY_ASM2 LSS_BODY_ASM1
+    #define LSS_BODY_ASM3 LSS_BODY_ASM2
+    #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
+    #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
+    #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
+
+    #undef  LSS_BODY_CLOBBER0
+    #undef  LSS_BODY_CLOBBER1
+    #undef  LSS_BODY_CLOBBER2
+    #undef  LSS_BODY_CLOBBER3
+    #undef  LSS_BODY_CLOBBER4
+    #undef  LSS_BODY_CLOBBER5
+    #undef  LSS_BODY_CLOBBER6
+    #define LSS_BODY_CLOBBER0
+    #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
+    #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
+    #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
+    #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
+    #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
+    #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
+
+    #undef  LSS_BODY_ARG0
+    #undef  LSS_BODY_ARG1
+    #undef  LSS_BODY_ARG2
+    #undef  LSS_BODY_ARG3
+    #undef  LSS_BODY_ARG4
+    #undef  LSS_BODY_ARG5
+    #undef  LSS_BODY_ARG6
+    #define LSS_BODY_ARG0()
+    #define LSS_BODY_ARG1(arg1) \
+      LSS_BODY_ARG0(), "D" (arg1)
+    #define LSS_BODY_ARG2(arg1, arg2) \
+      LSS_BODY_ARG1(arg1), "S" (arg2)
+    #define LSS_BODY_ARG3(arg1, arg2, arg3) \
+      LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
+    #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
+      LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
+    #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
+      LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
+    #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
+      LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
+
+    #undef _syscall0
+    #define _syscall0(type,name)                                              \
+      type LSS_NAME(name)() {                                                 \
+        LSS_BODY(0, type, name);                                              \
+      }
+    #undef _syscall1
+    #define _syscall1(type,name,type1,arg1)                                   \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1));                       \
+      }
+    #undef _syscall2
+    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
+      }
+    #undef _syscall3
+    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3));                       \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
+      }
+    #undef _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
+                                LSS_SYSCALL_ARG(arg5));                       \
+      }
+    #undef _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
+                                LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long long __res;
+      {
+        __asm__ __volatile__(/* if (fn == NULL)
+                              *   return -EINVAL;
+                              */
+                             "testq  %4,%4\n"
+                             "jz     1f\n"
+
+                             /* if (child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "testq  %5,%5\n"
+                             "jz     1f\n"
+
+                             /* Set up alignment of the child stack:
+                              * child_stack = (child_stack & ~0xF) - 16;
+                              */
+                             "andq   $-16,%5\n"
+                             "subq   $16,%5\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+                             "movq   %7,8(%5)\n"
+                             "movq   %4,0(%5)\n"
+
+                             /* %rax = syscall(%rax = __NR_clone,
+                              *                %rdi = flags,
+                              *                %rsi = child_stack,
+                              *                %rdx = parent_tidptr,
+                              *                %r8  = new_tls,
+                              *                %r10 = child_tidptr)
+                              */
+                             "movq   %2,%%rax\n"
+                             "movq   %9,%%r8\n"
+                             "movq   %10,%%r10\n"
+                             "syscall\n"
+
+                             /* if (%rax != 0)
+                              *   return;
+                              */
+                             "testq  %%rax,%%rax\n"
+                             "jnz    1f\n"
+
+                             /* In the child. Terminate frame pointer chain.
+                              */
+                             "xorq   %%rbp,%%rbp\n"
+
+                             /* Call "fn(arg)".
+                              */
+                             "popq   %%rax\n"
+                             "popq   %%rdi\n"
+                             "call   *%%rax\n"
+
+                             /* Call _exit(%ebx).
+                              */
+                             "movq   %%rax,%%rdi\n"
+                             "movq   %3,%%rax\n"
+                             "syscall\n"
+
+                             /* Return to parent.
+                              */
+                           "1:\n"
+                             : "=a" (__res)
+                             : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+                               "r"(LSS_SYSCALL_ARG(fn)),
+                               "S"(LSS_SYSCALL_ARG(child_stack)),
+                               "D"(LSS_SYSCALL_ARG(flags)),
+                               "r"(LSS_SYSCALL_ARG(arg)),
+                               "d"(LSS_SYSCALL_ARG(parent_tidptr)),
+                               "r"(LSS_SYSCALL_ARG(newtls)),
+                               "r"(LSS_SYSCALL_ARG(child_tidptr))
+                             : "rsp", "memory", "r8", "r10", "r11", "rcx");
+      }
+      LSS_RETURN(int, __res);
+    }
+
+    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+      /* On x86-64, the kernel does not know how to return from
+       * a signal handler. Instead, it relies on user space to provide a
+       * restorer function that calls the rt_sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      long long res;
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:movq   %1,%%rax\n"
+                           "syscall\n"
+                         "2:popq   %0\n"
+                           "addq   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_rt_sigreturn));
+      return (void (*)(void))(uintptr_t)res;
+    }
+  #elif defined(__arm__)
+    /* Most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all fo the _syscallX() macros.
+     */
+    #undef LSS_REG
+    #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
+
+    /* r0..r3 are scratch registers and not preserved across function
+     * calls.  We need to first evaluate the first 4 syscall arguments
+     * and store them on stack.  They must be loaded into r0..r3 after
+     * all function calls to avoid r0..r3 being clobbered.
+     */
+    #undef LSS_SAVE_ARG
+    #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a
+    #undef LSS_LOAD_ARG
+    #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r
+
+    #undef  LSS_BODY
+    #define LSS_BODY(type, name, args...)                                     \
+          register long __res_r0 __asm__("r0");                               \
+          long __res;                                                         \
+          __SYS_REG(name)                                                     \
+          __asm__ __volatile__ (__syscall_safe(name)                          \
+                                : "=r"(__res_r0)                              \
+                                : __SYS_REG_LIST(args)                        \
+                                : "lr", "memory");                            \
+          __res = __res_r0;                                                   \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)() {                                                 \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        /* There is no need for using a volatile temp.  */                    \
+        LSS_REG(0, arg1);                                                     \
+        LSS_BODY(type, name, "r"(__r0));                                      \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
+      }
+    #undef _syscall4
+    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4)                                            \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
+      }
+    #undef _syscall5
+    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4, type5, arg5)                               \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_REG(4, arg5);                                                     \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4));                                      \
+      }
+    #undef _syscall6
+    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4, type5, arg5, type6, arg6)                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_REG(4, arg5);                                                     \
+        LSS_REG(5, arg6);                                                     \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4), "r"(__r5));                           \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      register long __res __asm__("r5");
+      {
+        if (fn == NULL || child_stack == NULL) {
+            __res = -EINVAL;
+            goto clone_exit;
+        }
+
+        /* stash first 4 arguments on stack first because we can only load
+         * them after all function calls.
+         */
+        int    tmp_flags = flags;
+        int  * tmp_stack = (int*) child_stack;
+        void * tmp_ptid  = parent_tidptr;
+        void * tmp_tls   = newtls;
+
+        register int  *__ctid  __asm__("r4") = child_tidptr;
+
+        /* Push "arg" and "fn" onto the stack that will be
+         * used by the child.
+         */
+        *(--tmp_stack) = (int) arg;
+        *(--tmp_stack) = (int) fn;
+
+        /* We must load r0..r3 last after all possible function calls.  */
+        register int   __flags __asm__("r0") = tmp_flags;
+        register void *__stack __asm__("r1") = tmp_stack;
+        register void *__ptid  __asm__("r2") = tmp_ptid;
+        register void *__tls   __asm__("r3") = tmp_tls;
+
+        /* %r0 = syscall(%r0 = flags,
+         *               %r1 = child_stack,
+         *               %r2 = parent_tidptr,
+         *               %r3 = newtls,
+         *               %r4 = child_tidptr)
+         */
+        __SYS_REG(clone)
+        __asm__ __volatile__(/* %r0 = syscall(%r0 = flags,
+                              *               %r1 = child_stack,
+                              *               %r2 = parent_tidptr,
+                              *               %r3 = newtls,
+                              *               %r4 = child_tidptr)
+                              */
+                             "push  {r7}\n"
+                             "mov   r7,%1\n"
+                             __syscall(clone)"\n"
+
+                             /* if (%r0 != 0)
+                              *   return %r0;
+                              */
+                             "movs  %0,r0\n"
+                             "bne   1f\n"
+
+                             /* In the child, now. Call "fn(arg)".
+                              */
+                             "ldr   r0,[sp, #4]\n"
+                             "mov   lr,pc\n"
+                             "ldr   pc,[sp]\n"
+
+                             /* Call _exit(%r0), which never returns.  We only
+                              * need to set r7 for EABI syscall ABI but we do
+                              * this always to simplify code sharing between
+                              * old and new syscall ABIs.
+                              */
+                             "mov   r7,%2\n"
+                             __syscall(exit)"\n"
+
+                             /* Pop r7 from the stack only in the parent.
+                              */
+                           "1: pop {r7}\n"
+                             : "=r" (__res)
+                             : "r"(__sysreg),
+                               "i"(__NR_exit), "r"(__stack), "r"(__flags),
+                               "r"(__ptid), "r"(__tls), "r"(__ctid)
+                             : "cc", "lr", "memory");
+      }
+      clone_exit:
+      LSS_RETURN(int, __res);
+    }
+  #elif defined(__mips__)
+    #undef LSS_REG
+    #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
+                                 (unsigned long)(a)
+
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html
+    // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html
+    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\
+                                "$13", "$14", "$15", "$24", "$25", "memory"
+    #else
+    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13",     \
+                                "$14", "$15", "$24", "$25", "memory"
+    #endif
+
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name,r7,...)                                        \
+          register unsigned long __v0 __asm__("$2") = __NR_##name;            \
+          __asm__ __volatile__ ("syscall\n"                                   \
+                                : "=&r"(__v0), r7 (__r7)                      \
+                                : "0"(__v0), ##__VA_ARGS__                    \
+                                : MIPS_SYSCALL_CLOBBERS);                     \
+          LSS_RETURN(type, __v0, __r7)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)() {                                                 \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_BODY(type, name, "=r");                                           \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4));              \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_REG(4, arg1); LSS_REG(5, arg2);                                   \
+        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5));                     \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6));          \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6));          \
+      }
+    #undef _syscall5
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    /* The old 32bit MIPS system call API passes the fifth and sixth argument
+     * on the stack, whereas the new APIs use registers "r8" and "r9".
+     */
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
+        register unsigned long __v0 __asm__("$2");                            \
+        __asm__ __volatile__ (".set noreorder\n"                              \
+                              "lw    $2, %6\n"                                \
+                              "subu  $29, 32\n"                               \
+                              "sw    $2, 16($29)\n"                           \
+                              "li    $2, %2\n"                                \
+                              "syscall\n"                                     \
+                              "addiu $29, 32\n"                               \
+                              ".set reorder\n"                                \
+                              : "=&r"(__v0), "+r" (__r7)                      \
+                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
+                                "r"(__r6), "m" ((unsigned long)arg5)          \
+                              : MIPS_SYSCALL_CLOBBERS);                       \
+        LSS_RETURN(type, __v0, __r7);                                         \
+      }
+    #else
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4); LSS_REG(8, arg5);                                   \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
+                 "r"(__r8));                                                  \
+      }
+    #endif
+    #undef _syscall6
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    /* The old 32bit MIPS system call API passes the fifth and sixth argument
+     * on the stack, whereas the new APIs use registers "r8" and "r9".
+     */
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
+        register unsigned long __v0 __asm__("$2");                            \
+        __asm__ __volatile__ (".set noreorder\n"                              \
+                              "lw    $2, %6\n"                                \
+                              "lw    $8, %7\n"                                \
+                              "subu  $29, 32\n"                               \
+                              "sw    $2, 16($29)\n"                           \
+                              "sw    $8, 20($29)\n"                           \
+                              "li    $2, %2\n"                                \
+                              "syscall\n"                                     \
+                              "addiu $29, 32\n"                               \
+                              ".set reorder\n"                                \
+                              : "=&r"(__v0), "+r" (__r7)                      \
+                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
+                                "r"(__r6), "r" ((unsigned long)arg5),         \
+                                "r" ((unsigned long)arg6)                     \
+                              : MIPS_SYSCALL_CLOBBERS);                       \
+        LSS_RETURN(type, __v0, __r7);                                         \
+      }
+    #else
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5,type6 arg6) {                            \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6);                 \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
+                 "r"(__r8), "r"(__r9));                                       \
+      }
+    #endif
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      register unsigned long __v0 __asm__("$2");
+      register unsigned long __r7 __asm__("$7") = (unsigned long)newtls;
+      {
+        register int   __flags __asm__("$4") = flags;
+        register void *__stack __asm__("$5") = child_stack;
+        register void *__ptid  __asm__("$6") = parent_tidptr;
+        register int  *__ctid  __asm__("$8") = child_tidptr;
+        __asm__ __volatile__(
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "subu  $29,24\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "sub   $29,16\n"
+          #else
+                             "dsubu $29,16\n"
+          #endif
+
+                             /* if (fn == NULL || child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "li    %0,%2\n"
+                             "beqz  %5,1f\n"
+                             "beqz  %6,1f\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "subu  %6,32\n"
+                             "sw    %5,0(%6)\n"
+                             "sw    %8,4(%6)\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "sub   %6,32\n"
+                             "sw    %5,0(%6)\n"
+                             "sw    %8,8(%6)\n"
+          #else
+                             "dsubu %6,32\n"
+                             "sd    %5,0(%6)\n"
+                             "sd    %8,8(%6)\n"
+          #endif
+
+                             /* $7 = syscall($4 = flags,
+                              *              $5 = child_stack,
+                              *              $6 = parent_tidptr,
+                              *              $7 = newtls,
+                              *              $8 = child_tidptr)
+                              */
+                             "li    $2,%3\n"
+                             "syscall\n"
+
+                             /* if ($7 != 0)
+                              *   return $2;
+                              */
+                             "bnez  $7,1f\n"
+                             "bnez  $2,1f\n"
+
+                             /* In the child, now. Call "fn(arg)".
+                              */
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                            "lw    $25,0($29)\n"
+                            "lw    $4,4($29)\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                            "lw    $25,0($29)\n"
+                            "lw    $4,8($29)\n"
+          #else
+                            "ld    $25,0($29)\n"
+                            "ld    $4,8($29)\n"
+          #endif
+                            "jalr  $25\n"
+
+                             /* Call _exit($2)
+                              */
+                            "move  $4,$2\n"
+                            "li    $2,%4\n"
+                            "syscall\n"
+
+                           "1:\n"
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "addu  $29, 24\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "add   $29, 16\n"
+          #else
+                             "daddu $29,16\n"
+          #endif
+                             : "=&r" (__v0), "=r" (__r7)
+                             : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+                               "r"(__ptid), "r"(__r7), "r"(__ctid)
+                             : "$9", "$10", "$11", "$12", "$13", "$14", "$15",
+                               "$24", "memory");
+      }
+      LSS_RETURN(int, __v0, __r7);
+    }
+  #elif defined (__PPC__)
+    #undef  LSS_LOADARGS_0
+    #define LSS_LOADARGS_0(name, dummy...)                                    \
+        __sc_0 = __NR_##name
+    #undef  LSS_LOADARGS_1
+    #define LSS_LOADARGS_1(name, arg1)                                        \
+            LSS_LOADARGS_0(name);                                             \
+            __sc_3 = (unsigned long) (arg1)
+    #undef  LSS_LOADARGS_2
+    #define LSS_LOADARGS_2(name, arg1, arg2)                                  \
+            LSS_LOADARGS_1(name, arg1);                                       \
+            __sc_4 = (unsigned long) (arg2)
+    #undef  LSS_LOADARGS_3
+    #define LSS_LOADARGS_3(name, arg1, arg2, arg3)                            \
+            LSS_LOADARGS_2(name, arg1, arg2);                                 \
+            __sc_5 = (unsigned long) (arg3)
+    #undef  LSS_LOADARGS_4
+    #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4)                      \
+            LSS_LOADARGS_3(name, arg1, arg2, arg3);                           \
+            __sc_6 = (unsigned long) (arg4)
+    #undef  LSS_LOADARGS_5
+    #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5)                \
+            LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4);                     \
+            __sc_7 = (unsigned long) (arg5)
+    #undef  LSS_LOADARGS_6
+    #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6)          \
+            LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5);               \
+            __sc_8 = (unsigned long) (arg6)
+    #undef  LSS_ASMINPUT_0
+    #define LSS_ASMINPUT_0 "0" (__sc_0)
+    #undef  LSS_ASMINPUT_1
+    #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3)
+    #undef  LSS_ASMINPUT_2
+    #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4)
+    #undef  LSS_ASMINPUT_3
+    #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5)
+    #undef  LSS_ASMINPUT_4
+    #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6)
+    #undef  LSS_ASMINPUT_5
+    #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7)
+    #undef  LSS_ASMINPUT_6
+    #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8)
+    #undef  LSS_BODY
+    #define LSS_BODY(nr, type, name, args...)                                 \
+        long __sc_ret, __sc_err;                                              \
+        {                                                                     \
+            register unsigned long __sc_0 __asm__ ("r0");                     \
+            register unsigned long __sc_3 __asm__ ("r3");                     \
+            register unsigned long __sc_4 __asm__ ("r4");                     \
+            register unsigned long __sc_5 __asm__ ("r5");                     \
+            register unsigned long __sc_6 __asm__ ("r6");                     \
+            register unsigned long __sc_7 __asm__ ("r7");                     \
+            register unsigned long __sc_8 __asm__ ("r8");                     \
+                                                                              \
+            LSS_LOADARGS_##nr(name, args);                                    \
+            __asm__ __volatile__                                              \
+                ("sc\n\t"                                                     \
+                 "mfcr %0"                                                    \
+                 : "=&r" (__sc_0),                                            \
+                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
+                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
+                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
+                 : LSS_ASMINPUT_##nr                                          \
+                 : "cr0", "ctr", "memory",                                    \
+                   "r9", "r10", "r11", "r12");                                \
+            __sc_ret = __sc_3;                                                \
+            __sc_err = __sc_0;                                                \
+        }                                                                     \
+        LSS_RETURN(type, __sc_ret, __sc_err)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+       type LSS_NAME(name)(void) {                                            \
+          LSS_BODY(0, type, name);                                            \
+       }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+       type LSS_NAME(name)(type1 arg1) {                                      \
+          LSS_BODY(1, type, name, arg1);                                      \
+       }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
+          LSS_BODY(2, type, name, arg1, arg2);                                \
+       }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
+          LSS_BODY(3, type, name, arg1, arg2, arg3);                          \
+       }
+    #undef _syscall4
+    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4)                                \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \
+          LSS_BODY(4, type, name, arg1, arg2, arg3, arg4);                    \
+       }
+    #undef _syscall5
+    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
+                                               type5 arg5) {                  \
+          LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5);              \
+       }
+    #undef _syscall6
+    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5, type6, arg6)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
+                                               type5 arg5, type6 arg6) {      \
+          LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6);        \
+       }
+    /* clone function adapted from glibc 2.18 clone.S                       */
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __ret, __err;
+      {
+#if defined(__PPC64__)
+
+/* Stack frame offsets.  */
+#if _CALL_ELF != 2
+#define FRAME_MIN_SIZE         112
+#define FRAME_TOC_SAVE         40
+#else
+#define FRAME_MIN_SIZE         32
+#define FRAME_TOC_SAVE         24
+#endif
+
+
+        register int (*__fn)(void *) __asm__ ("r3") = fn;
+        register void *__cstack      __asm__ ("r4") = child_stack;
+        register int __flags         __asm__ ("r5") = flags;
+        register void * __arg        __asm__ ("r6") = arg;
+        register int * __ptidptr     __asm__ ("r7") = parent_tidptr;
+        register void * __newtls     __asm__ ("r8") = newtls;
+        register int * __ctidptr     __asm__ ("r9") = child_tidptr;
+        __asm__ __volatile__(
+            /* check for fn == NULL
+             * and child_stack == NULL
+             */
+            "cmpdi cr0, %6, 0\n\t"
+            "cmpdi cr1, %7, 0\n\t"
+            "cror  cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
+            "beq-  cr0, 1f\n\t"
+
+            /* set up stack frame for child                                  */
+            "clrrdi %7, %7, 4\n\t"
+            "li     0, 0\n\t"
+            "stdu   0, -%13(%7)\n\t"
+
+            /* fn, arg, child_stack are saved acrVoss the syscall             */
+            "mr 28, %6\n\t"
+            "mr 29, %7\n\t"
+            "mr 27, %9\n\t"
+
+            /* syscall
+               r3 == flags
+               r4 == child_stack
+               r5 == parent_tidptr
+               r6 == newtls
+               r7 == child_tidptr                                            */
+            "mr 3, %8\n\t"
+            "mr 5, %10\n\t"
+            "mr 6, %11\n\t"
+            "mr 7, %12\n\t"
+	    "li	0, %4\n\t"
+            "sc\n\t"
+
+            /* Test if syscall was successful                                */
+            "cmpdi  cr1, 3, 0\n\t"
+            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
+            "bne-   cr1, 1f\n\t"
+
+            /* Do the function call                                          */
+            "std   2, %14(1)\n\t"
+#if _CALL_ELF != 2
+	    "ld    0, 0(28)\n\t"
+	    "ld    2, 8(28)\n\t"
+            "mtctr 0\n\t"
+#else
+            "mr    12, 28\n\t"
+            "mtctr 12\n\t"
+#endif
+            "mr    3, 27\n\t"
+            "bctrl\n\t"
+	    "ld    2, %14(1)\n\t"
+
+            /* Call _exit(r3)                                                */
+            "li 0, %5\n\t"
+            "sc\n\t"
+
+            /* Return to parent                                              */
+	    "1:\n\t"
+            "mr %0, 3\n\t"
+              : "=r" (__ret), "=r" (__err)
+              : "0" (-1), "i" (EINVAL),
+                "i" (__NR_clone), "i" (__NR_exit),
+                "r" (__fn), "r" (__cstack), "r" (__flags),
+                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
+                "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE)
+              : "cr0", "cr1", "memory", "ctr",
+                "r0", "r29", "r27", "r28");
+#else
+        register int (*__fn)(void *)    __asm__ ("r8")  = fn;
+        register void *__cstack                 __asm__ ("r4")  = child_stack;
+        register int __flags                    __asm__ ("r3")  = flags;
+        register void * __arg                   __asm__ ("r9")  = arg;
+        register int * __ptidptr                __asm__ ("r5")  = parent_tidptr;
+        register void * __newtls                __asm__ ("r6")  = newtls;
+        register int * __ctidptr                __asm__ ("r7")  = child_tidptr;
+        __asm__ __volatile__(
+            /* check for fn == NULL
+             * and child_stack == NULL
+             */
+            "cmpwi cr0, %6, 0\n\t"
+            "cmpwi cr1, %7, 0\n\t"
+            "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
+            "beq- cr0, 1f\n\t"
+
+            /* set up stack frame for child                                  */
+            "clrrwi %7, %7, 4\n\t"
+            "li 0, 0\n\t"
+            "stwu 0, -16(%7)\n\t"
+
+            /* fn, arg, child_stack are saved across the syscall: r28-30     */
+            "mr 28, %6\n\t"
+            "mr 29, %7\n\t"
+            "mr 27, %9\n\t"
+
+            /* syscall                                                       */
+            "li 0, %4\n\t"
+            /* flags already in r3
+             * child_stack already in r4
+             * ptidptr already in r5
+             * newtls already in r6
+             * ctidptr already in r7
+             */
+            "sc\n\t"
+
+            /* Test if syscall was successful                                */
+            "cmpwi cr1, 3, 0\n\t"
+            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
+            "bne- cr1, 1f\n\t"
+
+            /* Do the function call                                          */
+            "mtctr 28\n\t"
+            "mr 3, 27\n\t"
+            "bctrl\n\t"
+
+            /* Call _exit(r3)                                                */
+            "li 0, %5\n\t"
+            "sc\n\t"
+
+            /* Return to parent                                              */
+            "1:\n"
+            "mfcr %1\n\t"
+            "mr %0, 3\n\t"
+              : "=r" (__ret), "=r" (__err)
+              : "0" (-1), "1" (EINVAL),
+                "i" (__NR_clone), "i" (__NR_exit),
+                "r" (__fn), "r" (__cstack), "r" (__flags),
+                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
+                "r" (__ctidptr)
+              : "cr0", "cr1", "memory", "ctr",
+                "r0", "r29", "r27", "r28");
+
+#endif
+      }
+      LSS_RETURN(int, __ret, __err);
+    }
+  #elif defined(__aarch64__)
+    #undef LSS_REG
+    #define LSS_REG(r,a) register long __x##r __asm__("x"#r) = (long)a
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name,args...)                                       \
+          register long __res_x0 __asm__("x0");                               \
+          long __res;                                                         \
+          __asm__ __volatile__ ("mov x8, %1\n"                                \
+                                "svc 0x0\n"                                   \
+                                : "=r"(__res_x0)                              \
+                                : "i"(__NR_##name) , ## args                  \
+                                : "memory");                                  \
+          __res = __res_x0;                                                   \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)(void) {                                             \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__x0));                    \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
+        LSS_BODY(type, name, "r"(__x0), "r"(__x1));                           \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2));                \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4);                                                     \
+        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3));     \
+      }
+    #undef _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
+        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3),      \
+                             "r"(__x4));                                      \
+      }
+    #undef _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
+        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "x"(__x2), "r"(__x3),      \
+                             "r"(__x4), "r"(__x5));                           \
+      }
+    /* clone function adapted from glibc 2.18 clone.S                       */
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      {
+        register int (*__fn)(void *)  __asm__("x0") = fn;
+        register void *__stack __asm__("x1") = child_stack;
+        register int   __flags __asm__("x2") = flags;
+        register void *__arg   __asm__("x3") = arg;
+        register int  *__ptid  __asm__("x4") = parent_tidptr;
+        register void *__tls   __asm__("x5") = newtls;
+        register int  *__ctid  __asm__("x6") = child_tidptr;
+        __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "cbz     x0,1f\n"
+                             "cbz     x1,1f\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+                             "stp x0,x3, [x1, #-16]!\n"
+
+                             "mov x0,x2\n" /* flags  */
+                             "mov x2,x4\n" /* ptid  */
+                             "mov x3,x5\n" /* tls */
+                             "mov x4,x6\n" /* ctid */
+                             "mov x8,%9\n" /* clone */
+
+                             "svc 0x0\n"
+
+                             /* if (%r0 != 0)
+                              *   return %r0;
+                              */
+                             "cmp x0, #0\n"
+                             "bne 2f\n"
+
+                             /* In the child, now. Call "fn(arg)".
+                              */
+                             "ldp x1, x0, [sp], #16\n"
+                             "blr x1\n"
+
+                             /* Call _exit(%r0).
+                              */
+                             "mov x8, %10\n"
+                             "svc 0x0\n"
+                           "1:\n"
+                             "mov x8, %1\n"
+                           "2:\n"
+                             : "=r" (__res)
+                             : "i"(-EINVAL),
+                               "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg),
+                               "r"(__ptid), "r"(__tls), "r"(__ctid),
+                               "i"(__NR_clone), "i"(__NR_exit)
+                             : "x30", "memory");
+      }
+      LSS_RETURN(int, __res);
+    }
+  #endif
+  #define __NR__exit   __NR_exit
+  #define __NR__gettid __NR_gettid
+  #define __NR__mremap __NR_mremap
+  LSS_INLINE _syscall1(int,     close,           int,         f)
+  LSS_INLINE _syscall1(int,     _exit,           int,         e)
+  LSS_INLINE _syscall3(int,     fcntl,           int,         f,
+                       int,            c, long,   a)
+  LSS_INLINE _syscall2(int,     fstat,           int,         f,
+                      struct kernel_stat*,   b)
+  LSS_INLINE _syscall6(int,     futex,           int*,        a,
+                       int,            o, int,    v,
+                      struct kernel_timespec*, t,
+                       int*, a2,
+                       int, v3)
+#ifdef __NR_getdents64
+    LSS_INLINE _syscall3(int,     getdents64,      int,         f,
+                         struct kernel_dirent64*, d, int,    c)
+#define KERNEL_DIRENT kernel_dirent64
+#define GETDENTS sys_getdents64
+#else
+    LSS_INLINE _syscall3(int,     getdents,        int,         f,
+                         struct kernel_dirent*, d, int,    c)
+#define KERNEL_DIRENT kernel_dirent
+#define GETDENTS sys_getdents
+#endif
+  LSS_INLINE _syscall0(pid_t,   getpid)
+  LSS_INLINE _syscall0(pid_t,   getppid)
+  LSS_INLINE _syscall0(pid_t,   _gettid)
+  LSS_INLINE _syscall2(int,     kill,            pid_t,       p,
+                       int,            s)
+  #if defined(__x86_64__)
+    /* Need to make sure off_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
+      _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
+                                        LSS_SYSCALL_ARG(w));
+    }
+  #else
+    LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
+                         off_t,          o, int,    w)
+  #endif
+  LSS_INLINE _syscall2(int,     munmap,          void*,       s,
+                       size_t,         l)
+  LSS_INLINE _syscall5(void*,   _mremap,         void*,       o,
+                       size_t,         os,       size_t,      ns,
+                       unsigned long,  f, void *, a)
+  LSS_INLINE _syscall2(int,     prctl,           int,         o,
+                       long,           a)
+  LSS_INLINE _syscall4(long,    ptrace,          int,         r,
+                       pid_t,          p, void *, a, void *, d)
+  LSS_INLINE _syscall3(ssize_t, read,            int,         f,
+                       void *,         b, size_t, c)
+  LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s,
+                       const struct kernel_sigaction*, a,
+                       struct kernel_sigaction*, o, size_t,   c)
+  LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h,
+                       const struct kernel_sigset_t*,  s,
+                       struct kernel_sigset_t*,        o, size_t, c);
+  LSS_INLINE _syscall0(int,     sched_yield)
+  LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s,
+                       const stack_t*, o)
+  #if defined(__NR_fstatat)
+    LSS_INLINE _syscall4(int, fstatat, int, d, const char *, p,
+                         struct kernel_stat*,   b, int, flags)
+    LSS_INLINE int LSS_NAME(stat)(const char* p, struct kernel_stat* b) {
+      return LSS_NAME(fstatat)(AT_FDCWD,p,b,0);
+  }
+  #else
+    LSS_INLINE _syscall2(int,     stat,            const char*, f,
+                         struct kernel_stat*,   b)
+  #endif
+  LSS_INLINE _syscall3(ssize_t, write,            int,        f,
+                       const void *,   b, size_t, c)
+  #if defined(__NR_getcpu)
+    LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
+                         unsigned *, node, void *, unused);
+  #endif
+  #if defined(__x86_64__) || defined(__aarch64__) || \
+     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
+    LSS_INLINE _syscall3(int, socket,             int,   d,
+                         int,                     t, int,       p)
+  #endif
+  #if defined(__x86_64__)
+    /* Need to make sure __off64_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
+                                    __off64_t o) {
+      LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
+                               LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
+                               LSS_SYSCALL_ARG(d), (uint64_t)(o));
+    }
+
+    LSS_INLINE int LSS_NAME(sigaction)(int signum,
+                                       const struct kernel_sigaction *act,
+                                       struct kernel_sigaction *oldact) {
+      /* On x86_64, the kernel requires us to always set our own
+       * SA_RESTORER in order to be able to return from a signal handler.
+       * This function must have a "magic" signature that the "gdb"
+       * (and maybe the kernel?) can recognize.
+       */
+      if (act != NULL && !(act->sa_flags & SA_RESTORER)) {
+        struct kernel_sigaction a = *act;
+        a.sa_flags   |= SA_RESTORER;
+        a.sa_restorer = LSS_NAME(restore_rt)();
+        return LSS_NAME(rt_sigaction)(signum, &a, oldact,
+                                      (KERNEL_NSIG+7)/8);
+      } else {
+        return LSS_NAME(rt_sigaction)(signum, act, oldact,
+                                      (KERNEL_NSIG+7)/8);
+      }
+    }
+
+    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+                                         const struct kernel_sigset_t *set,
+                                         struct kernel_sigset_t *oldset) {
+      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+    }
+  #endif
+  #if (defined(__aarch64__)) || \
+      (defined(__mips__) && (_MIPS_ISA == _MIPS_ISA_MIPS64))
+    LSS_INLINE _syscall6(void*, mmap,              void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         __off64_t,                o)
+    LSS_INLINE int LSS_NAME(sigaction)(int signum,
+                                       const struct kernel_sigaction *act,
+                                       struct kernel_sigaction *oldact) {
+        return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG+7)/8);
+
+    }
+    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+                                         const struct kernel_sigset_t *set,
+                                         struct kernel_sigset_t *oldset) {
+      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+    }
+  #endif
+  #ifdef __NR_wait4
+    LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p,
+                         int*,                    s, int,       o,
+                         struct kernel_rusage*,   r)
+    LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
+      return LSS_NAME(wait4)(pid, status, options, 0);
+    }
+  #else
+    LSS_INLINE _syscall3(pid_t, waitpid,          pid_t, p,
+                         int*,              s,    int,   o)
+  #endif
+  #ifdef __NR_openat
+    LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
+    LSS_INLINE int LSS_NAME(open)(const char* p, int f, int m) {
+      return LSS_NAME(openat)(AT_FDCWD,p,f,m );
+    }
+  #else
+  LSS_INLINE _syscall3(int,     open,            const char*, p,
+                       int,            f, int,    m)
+  #endif
+  LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
+    memset(&set->sig, 0, sizeof(set->sig));
+    return 0;
+  }
+
+  LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
+    memset(&set->sig, -1, sizeof(set->sig));
+    return 0;
+  }
+
+  LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
+                                     int signum) {
+    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+      LSS_ERRNO = EINVAL;
+      return -1;
+    } else {
+      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+          |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0])));
+      return 0;
+    }
+  }
+
+  LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
+                                        int signum) {
+    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+      LSS_ERRNO = EINVAL;
+      return -1;
+    } else {
+      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+          &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0]))));
+      return 0;
+    }
+  }
+
+  #if defined(__i386__) || \
+      defined(__arm__) || \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__)
+    #define __NR__sigaction   __NR_sigaction
+    #define __NR__sigprocmask __NR_sigprocmask
+    LSS_INLINE _syscall2(int, fstat64,             int, f,
+                         struct kernel_stat64 *, b)
+    LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo,
+                         loff_t *, res, uint, wh)
+#ifdef __PPC64__
+    LSS_INLINE _syscall6(void*, mmap,              void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         off_t,                    o)
+#else
+    #ifndef __ARM_EABI__
+    /* Not available on ARM EABI Linux.  */
+    LSS_INLINE _syscall1(void*, mmap,              void*, a)
+    #endif
+    LSS_INLINE _syscall6(void*, mmap2,             void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         off_t,                    o)
+#endif
+    LSS_INLINE _syscall3(int,   _sigaction,        int,   s,
+                         const struct kernel_old_sigaction*,  a,
+                         struct kernel_old_sigaction*,        o)
+    LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h,
+                         const unsigned long*,     s,
+                         unsigned long*,           o)
+    LSS_INLINE _syscall2(int, stat64,              const char *, p,
+                         struct kernel_stat64 *, b)
+
+    LSS_INLINE int LSS_NAME(sigaction)(int signum,
+                                       const struct kernel_sigaction *act,
+                                       struct kernel_sigaction *oldact) {
+      int old_errno = LSS_ERRNO;
+      int rc;
+      struct kernel_sigaction a;
+      if (act != NULL) {
+        a             = *act;
+        #ifdef __i386__
+        /* On i386, the kernel requires us to always set our own
+         * SA_RESTORER when using realtime signals. Otherwise, it does not
+         * know how to return from a signal handler. This function must have
+         * a "magic" signature that the "gdb" (and maybe the kernel?) can
+         * recognize.
+         * Apparently, a SA_RESTORER is implicitly set by the kernel, when
+         * using non-realtime signals.
+         *
+         * TODO: Test whether ARM needs a restorer
+         */
+        if (!(a.sa_flags & SA_RESTORER)) {
+          a.sa_flags   |= SA_RESTORER;
+          a.sa_restorer = (a.sa_flags & SA_SIGINFO)
+                          ? LSS_NAME(restore_rt)() : LSS_NAME(restore)();
+        }
+        #endif
+      }
+      rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
+                                  (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa;
+        if (!act) {
+          ptr_a            = NULL;
+        } else {
+          oa.sa_handler_   = act->sa_handler_;
+          memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask));
+          #ifndef __mips__
+          oa.sa_restorer   = act->sa_restorer;
+          #endif
+          oa.sa_flags      = act->sa_flags;
+        }
+        if (!oldact) {
+          ptr_oa           = NULL;
+        }
+        LSS_ERRNO = old_errno;
+        rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa);
+        if (rc == 0 && oldact) {
+          if (act) {
+            memcpy(oldact, act, sizeof(*act));
+          } else {
+            memset(oldact, 0, sizeof(*oldact));
+          }
+          oldact->sa_handler_    = ptr_oa->sa_handler_;
+          oldact->sa_flags       = ptr_oa->sa_flags;
+          memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask));
+          #ifndef __mips__
+          oldact->sa_restorer    = ptr_oa->sa_restorer;
+          #endif
+        }
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+                                         const struct kernel_sigset_t *set,
+                                         struct kernel_sigset_t *oldset) {
+      int olderrno = LSS_ERRNO;
+      int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        LSS_ERRNO = olderrno;
+        if (oldset) {
+          LSS_NAME(sigemptyset)(oldset);
+        }
+        rc = LSS_NAME(_sigprocmask)(how,
+                                    set ? &set->sig[0] : NULL,
+                                    oldset ? &oldset->sig[0] : NULL);
+      }
+      return rc;
+    }
+  #endif
+  #if defined(__i386__) || \
+      defined(__PPC__) || \
+      (defined(__arm__) && !defined(__ARM_EABI__)) || \
+      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+
+    /* See sys_socketcall in net/socket.c in kernel source.
+     * It de-multiplexes on its first arg and unpacks the arglist
+     * array in its second arg.
+     */
+    LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a)
+
+    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
+      unsigned long args[3] = {
+        (unsigned long) domain,
+        (unsigned long) type,
+        (unsigned long) protocol
+      };
+      return LSS_NAME(socketcall)(1, args);
+    }
+  #elif defined(__ARM_EABI__)
+    LSS_INLINE _syscall3(int, socket,             int,   d,
+                         int,                     t, int,       p)
+  #endif
+  #if defined(__mips__)
+    /* sys_pipe() on MIPS has non-standard calling conventions, as it returns
+     * both file handles through CPU registers.
+     */
+    LSS_INLINE int LSS_NAME(pipe)(int *p) {
+      register unsigned long __v0 __asm__("$2") = __NR_pipe;
+      register unsigned long __v1 __asm__("$3");
+      register unsigned long __r7 __asm__("$7");
+      __asm__ __volatile__ ("syscall\n"
+                            : "=&r"(__v0), "=&r"(__v1), "+r" (__r7)
+                            : "0"(__v0)
+                            : "$8", "$9", "$10", "$11", "$12",
+                              "$13", "$14", "$15", "$24", "memory");
+      if (__r7) {
+        LSS_ERRNO = __v0;
+        return -1;
+      } else {
+        p[0] = __v0;
+        p[1] = __v1;
+        return 0;
+      }
+    }
+  #elif defined(__NR_pipe2)
+    LSS_INLINE _syscall2(int,     pipe2,          int *, p,
+                         int,     f                        )
+    LSS_INLINE int LSS_NAME(pipe)( int * p) {
+        return LSS_NAME(pipe2)(p, 0);
+    }
+  #else
+    LSS_INLINE _syscall1(int,     pipe,           int *, p)
+  #endif
+
+  LSS_INLINE pid_t LSS_NAME(gettid)() {
+    pid_t tid = LSS_NAME(_gettid)();
+    if (tid != -1) {
+      return tid;
+    }
+    return LSS_NAME(getpid)();
+  }
+
+  LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size,
+                                    size_t new_size, int flags, ...) {
+    va_list ap;
+    void *new_address, *rc;
+    va_start(ap, flags);
+    new_address = va_arg(ap, void *);
+    rc = LSS_NAME(_mremap)(old_address, old_size, new_size,
+                           flags, new_address);
+    va_end(ap);
+    return rc;
+  }
+
+  LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) {
+    /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
+     * then sends job control signals to the real parent, rather than to
+     * the tracer. We reduce the risk of this happening by starting a
+     * whole new time slice, and then quickly sending a SIGCONT signal
+     * right after detaching from the tracee.
+     */
+    int rc, err;
+    LSS_NAME(sched_yield)();
+    rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
+    err = LSS_ERRNO;
+    LSS_NAME(kill)(pid, SIGCONT);
+    LSS_ERRNO = err;
+    return rc;
+  }
+#endif
+
+#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
+}
+#endif
+
+#endif
+#endif

diff --git a/src/base/linuxthreads.cc b/src/base/linuxthreads.cc
new file mode 100644
index 0000000..891e70c
--- /dev/null
+++ b/src/base/linuxthreads.cc

@@ -0,0 +1,707 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2005-2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#include "base/linuxthreads.h"
+
+#ifdef THREADS
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <semaphore.h>
+
+#include "base/linux_syscall_support.h"
+#include "base/thread_lister.h"
+
+#ifndef CLONE_UNTRACED
+#define CLONE_UNTRACED 0x00800000
+#endif
+
+
+/* Synchronous signals that should not be blocked while in the lister thread.
+ */
+static const int sync_signals[]  = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
+                                     SIGXCPU, SIGXFSZ };
+
+/* itoa() is not a standard function, and we cannot safely call printf()
+ * after suspending threads. So, we just implement our own copy. A
+ * recursive approach is the easiest here.
+ */
+static char *local_itoa(char *buf, int i) {
+  if (i < 0) {
+    *buf++ = '-';
+    return local_itoa(buf, -i);
+  } else {
+    if (i >= 10)
+      buf = local_itoa(buf, i/10);
+    *buf++ = (i%10) + '0';
+    *buf   = '\000';
+    return buf;
+  }
+}
+
+
+/* Wrapper around clone() that runs "fn" on the same stack as the
+ * caller! Unlike fork(), the cloned thread shares the same address space.
+ * The caller must be careful to use only minimal amounts of stack until
+ * the cloned thread has returned.
+ * There is a good chance that the cloned thread and the caller will share
+ * the same copy of errno!
+ */
+#ifdef __GNUC__
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3
+/* Try to force this function into a separate stack frame, and make sure
+ * that arguments are passed on the stack.
+ */
+static int local_clone (int (*fn)(void *), void *arg, ...)
+  __attribute__ ((noinline));
+#endif
+#endif
+
+/* To avoid the gap cross page boundaries, increase by the large parge
+ * size mostly PowerPC system uses.  */
+#ifdef __PPC64__
+#define CLONE_STACK_SIZE 65536
+#else
+#define CLONE_STACK_SIZE 4096
+#endif
+
+static int local_clone (int (*fn)(void *), void *arg, ...) {
+  /* Leave 4kB of gap between the callers stack and the new clone. This
+   * should be more than sufficient for the caller to call waitpid() until
+   * the cloned thread terminates.
+   *
+   * It is important that we set the CLONE_UNTRACED flag, because newer
+   * versions of "gdb" otherwise attempt to attach to our thread, and will
+   * attempt to reap its status codes. This subsequently results in the
+   * caller hanging indefinitely in waitpid(), waiting for a change in
+   * status that will never happen. By setting the CLONE_UNTRACED flag, we
+   * prevent "gdb" from stealing events, but we still expect the thread
+   * lister to fail, because it cannot PTRACE_ATTACH to the process that
+   * is being debugged. This is OK and the error code will be reported
+   * correctly.
+   */
+  return sys_clone(fn, (char *)&arg - CLONE_STACK_SIZE,
+                   CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg, 0, 0, 0);
+}
+
+
+/* Local substitute for the atoi() function, which is not necessarily safe
+ * to call once threads are suspended (depending on whether libc looks up
+ * locale information,  when executing atoi()).
+ */
+static int local_atoi(const char *s) {
+  int n   = 0;
+  int neg = *s == '-';
+  if (neg)
+    s++;
+  while (*s >= '0' && *s <= '9')
+    n = 10*n + (*s++ - '0');
+  return neg ? -n : n;
+}
+
+
+/* Re-runs fn until it doesn't cause EINTR
+ */
+#define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
+
+
+/* Wrap a class around system calls, in order to give us access to
+ * a private copy of errno. This only works in C++, but it has the
+ * advantage of not needing nested functions, which are a non-standard
+ * language extension.
+ */
+#ifdef __cplusplus
+namespace {
+  class SysCalls {
+   public:
+    #define SYS_CPLUSPLUS
+    #define SYS_ERRNO     my_errno
+    #define SYS_INLINE    inline
+    #define SYS_PREFIX    -1
+    #undef  SYS_LINUX_SYSCALL_SUPPORT_H
+    #include "linux_syscall_support.h"
+    SysCalls() : my_errno(0) { }
+    int my_errno;
+  };
+}
+#define ERRNO sys.my_errno
+#else
+#define ERRNO my_errno
+#endif
+
+
+/* Wrapper for open() which is guaranteed to never return EINTR.
+ */
+static int c_open(const char *fname, int flags, int mode) {
+  ssize_t rc;
+  NO_INTR(rc = sys_open(fname, flags, mode));
+  return rc;
+}
+
+
+/* abort() is not safely reentrant, and changes it's behavior each time
+ * it is called. This means, if the main application ever called abort()
+ * we cannot safely call it again. This would happen if we were called
+ * from a SIGABRT signal handler in the main application. So, document
+ * that calling SIGABRT from the thread lister makes it not signal safe
+ * (and vice-versa).
+ * Also, since we share address space with the main application, we
+ * cannot call abort() from the callback and expect the main application
+ * to behave correctly afterwards. In fact, the only thing we can do, is
+ * to terminate the main application with extreme prejudice (aka
+ * PTRACE_KILL).
+ * We set up our own SIGABRT handler to do this.
+ * In order to find the main application from the signal handler, we
+ * need to store information about it in global variables. This is
+ * safe, because the main application should be suspended at this
+ * time. If the callback ever called TCMalloc_ResumeAllProcessThreads(), then
+ * we are running a higher risk, though. So, try to avoid calling
+ * abort() after calling TCMalloc_ResumeAllProcessThreads.
+ */
+static volatile int *sig_pids, sig_num_threads, sig_proc, sig_marker;
+
+
+/* Signal handler to help us recover from dying while we are attached to
+ * other threads.
+ */
+static void SignalHandler(int signum, siginfo_t *si, void *data) {
+  if (sig_pids != NULL) {
+    if (signum == SIGABRT) {
+      while (sig_num_threads-- > 0) {
+        /* Not sure if sched_yield is really necessary here, but it does not */
+        /* hurt, and it might be necessary for the same reasons that we have */
+        /* to do so in sys_ptrace_detach().                                  */
+        sys_sched_yield();
+        sys_ptrace(PTRACE_KILL, sig_pids[sig_num_threads], 0, 0);
+      }
+    } else if (sig_num_threads > 0) {
+      TCMalloc_ResumeAllProcessThreads(sig_num_threads, (int *)sig_pids);
+    }
+  }
+  sig_pids = NULL;
+  if (sig_marker >= 0)
+    NO_INTR(sys_close(sig_marker));
+  sig_marker = -1;
+  if (sig_proc >= 0)
+    NO_INTR(sys_close(sig_proc));
+  sig_proc = -1;
+
+  sys__exit(signum == SIGABRT ? 1 : 2);
+}
+
+
+/* Try to dirty the stack, and hope that the compiler is not smart enough
+ * to optimize this function away. Or worse, the compiler could inline the
+ * function and permanently allocate the data on the stack.
+ */
+static void DirtyStack(size_t amount) {
+  char buf[amount];
+  memset(buf, 0, amount);
+  sys_read(-1, buf, amount);
+}
+
+
+/* Data structure for passing arguments to the lister thread.
+ */
+#define ALT_STACKSIZE (MINSIGSTKSZ + 4096)
+
+struct ListerParams {
+  int         result, err;
+  char        *altstack_mem;
+  ListAllProcessThreadsCallBack callback;
+  void        *parameter;
+  va_list     ap;
+  sem_t       *lock;
+};
+
+
+static void ListerThread(struct ListerParams *args) {
+  int                found_parent = 0;
+  pid_t              clone_pid  = sys_gettid(), ppid = sys_getppid();
+  char               proc_self_task[80], marker_name[48], *marker_path;
+  const char         *proc_paths[3];
+  const char *const  *proc_path = proc_paths;
+  int                proc = -1, marker = -1, num_threads = 0;
+  int                max_threads = 0, sig;
+  struct kernel_stat marker_sb, proc_sb;
+  stack_t            altstack;
+
+  /* Wait for parent thread to set appropriate permissions
+   * to allow ptrace activity
+   */
+  if (sem_wait(args->lock) < 0) {
+    goto failure;
+  }
+
+  /* Create "marker" that we can use to detect threads sharing the same
+   * address space and the same file handles. By setting the FD_CLOEXEC flag
+   * we minimize the risk of misidentifying child processes as threads;
+   * and since there is still a race condition,  we will filter those out
+   * later, anyway.
+   */
+  if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 ||
+      sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0) {
+  failure:
+    args->result = -1;
+    args->err    = errno;
+    if (marker >= 0)
+      NO_INTR(sys_close(marker));
+    sig_marker = marker = -1;
+    if (proc >= 0)
+      NO_INTR(sys_close(proc));
+    sig_proc = proc = -1;
+    sys__exit(1);
+  }
+
+  /* Compute search paths for finding thread directories in /proc            */
+  local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid);
+  strcpy(marker_name, proc_self_task);
+  marker_path = marker_name + strlen(marker_name);
+  strcat(proc_self_task, "/task/");
+  proc_paths[0] = proc_self_task; /* /proc/$$/task/                          */
+  proc_paths[1] = "/proc/";       /* /proc/                                  */
+  proc_paths[2] = NULL;
+
+  /* Compute path for marker socket in /proc                                 */
+  local_itoa(strcpy(marker_path, "/fd/") + 4, marker);
+  if (sys_stat(marker_name, &marker_sb) < 0) {
+    goto failure;
+  }
+
+  /* Catch signals on an alternate pre-allocated stack. This way, we can
+   * safely execute the signal handler even if we ran out of memory.
+   */
+  memset(&altstack, 0, sizeof(altstack));
+  altstack.ss_sp    = args->altstack_mem;
+  altstack.ss_flags = 0;
+  altstack.ss_size  = ALT_STACKSIZE;
+  sys_sigaltstack(&altstack, (const stack_t *)NULL);
+
+  /* Some kernels forget to wake up traced processes, when the
+   * tracer dies.  So, intercept synchronous signals and make sure
+   * that we wake up our tracees before dying. It is the caller's
+   * responsibility to ensure that asynchronous signals do not
+   * interfere with this function.
+   */
+  sig_marker = marker;
+  sig_proc   = -1;
+  for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
+    struct kernel_sigaction sa;
+    memset(&sa, 0, sizeof(sa));
+    sa.sa_sigaction_ = SignalHandler;
+    sys_sigfillset(&sa.sa_mask);
+    sa.sa_flags      = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND;
+    sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL);
+  }
+  
+  /* Read process directories in /proc/...                                   */
+  for (;;) {
+    /* Some kernels know about threads, and hide them in "/proc"
+     * (although they are still there, if you know the process
+     * id). Threads are moved into a separate "task" directory. We
+     * check there first, and then fall back on the older naming
+     * convention if necessary.
+     */
+    if ((sig_proc = proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) {
+      if (*++proc_path != NULL)
+        continue;
+      goto failure;
+    }
+    if (sys_fstat(proc, &proc_sb) < 0)
+      goto failure;
+    
+    /* Since we are suspending threads, we cannot call any libc
+     * functions that might acquire locks. Most notably, we cannot
+     * call malloc(). So, we have to allocate memory on the stack,
+     * instead. Since we do not know how much memory we need, we
+     * make a best guess. And if we guessed incorrectly we retry on
+     * a second iteration (by jumping to "detach_threads").
+     *
+     * Unless the number of threads is increasing very rapidly, we
+     * should never need to do so, though, as our guestimate is very
+     * conservative.
+     */
+    if (max_threads < proc_sb.st_nlink + 100)
+      max_threads = proc_sb.st_nlink + 100;
+    
+    /* scope */ {
+      pid_t pids[max_threads];
+      int   added_entries = 0;
+      sig_num_threads     = num_threads;
+      sig_pids            = pids;
+      for (;;) {
+        struct KERNEL_DIRENT *entry;
+        char buf[4096];
+        ssize_t nbytes = GETDENTS(proc, (struct KERNEL_DIRENT *)buf,
+                                         sizeof(buf));
+        if (nbytes < 0)
+          goto failure;
+        else if (nbytes == 0) {
+          if (added_entries) {
+            /* Need to keep iterating over "/proc" in multiple
+             * passes until we no longer find any more threads. This
+             * algorithm eventually completes, when all threads have
+             * been suspended.
+             */
+            added_entries = 0;
+            sys_lseek(proc, 0, SEEK_SET);
+            continue;
+          }
+          break;
+        }
+        for (entry = (struct KERNEL_DIRENT *)buf;
+             entry < (struct KERNEL_DIRENT *)&buf[nbytes];
+             entry = (struct KERNEL_DIRENT *)((char *)entry+entry->d_reclen)) {
+          if (entry->d_ino != 0) {
+            const char *ptr = entry->d_name;
+            pid_t pid;
+            
+            /* Some kernels hide threads by preceding the pid with a '.'     */
+            if (*ptr == '.')
+              ptr++;
+            
+            /* If the directory is not numeric, it cannot be a
+             * process/thread
+             */
+            if (*ptr < '0' || *ptr > '9')
+              continue;
+            pid = local_atoi(ptr);
+
+            /* Attach (and suspend) all threads                              */
+            if (pid && pid != clone_pid) {
+              struct kernel_stat tmp_sb;
+              char fname[entry->d_reclen + 48];
+              strcat(strcat(strcpy(fname, "/proc/"),
+                            entry->d_name), marker_path);
+              
+              /* Check if the marker is identical to the one we created      */
+              if (sys_stat(fname, &tmp_sb) >= 0 &&
+                  marker_sb.st_ino == tmp_sb.st_ino) {
+                long i, j;
+
+                /* Found one of our threads, make sure it is no duplicate    */
+                for (i = 0; i < num_threads; i++) {
+                  /* Linear search is slow, but should not matter much for
+                   * the typically small number of threads.
+                   */
+                  if (pids[i] == pid) {
+                    /* Found a duplicate; most likely on second pass         */
+                    goto next_entry;
+                  }
+                }
+                
+                /* Check whether data structure needs growing                */
+                if (num_threads >= max_threads) {
+                  /* Back to square one, this time with more memory          */
+                  NO_INTR(sys_close(proc));
+                  goto detach_threads;
+                }
+
+                /* Attaching to thread suspends it                           */
+                pids[num_threads++] = pid;
+                sig_num_threads     = num_threads;
+                if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0,
+                               (void *)0) < 0) {
+                  /* If operation failed, ignore thread. Maybe it
+                   * just died?  There might also be a race
+                   * condition with a concurrent core dumper or
+                   * with a debugger. In that case, we will just
+                   * make a best effort, rather than failing
+                   * entirely.
+                   */
+                  num_threads--;
+                  sig_num_threads = num_threads;
+                  goto next_entry;
+                }
+                while (sys_waitpid(pid, (int *)0, __WALL) < 0) {
+                  if (errno != EINTR) {
+                    sys_ptrace_detach(pid);
+                    num_threads--;
+                    sig_num_threads = num_threads;
+                    goto next_entry;
+                  }
+                }
+
+                if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j ||
+                    sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i   != j) {
+                  /* Address spaces are distinct, even though both
+                   * processes show the "marker". This is probably
+                   * a forked child process rather than a thread.
+                   */
+                  sys_ptrace_detach(pid);
+                  num_threads--;
+                  sig_num_threads = num_threads;
+                } else {
+                  found_parent |= pid == ppid;
+                  added_entries++;
+                }
+              }
+            }
+          }
+        next_entry:;
+        }
+      }
+      NO_INTR(sys_close(proc));
+      sig_proc = proc = -1;
+
+      /* If we failed to find any threads, try looking somewhere else in
+       * /proc. Maybe, threads are reported differently on this system.
+       */
+      if (num_threads > 1 || !*++proc_path) {
+        NO_INTR(sys_close(marker));
+        sig_marker = marker = -1;
+
+        /* If we never found the parent process, something is very wrong.
+         * Most likely, we are running in debugger. Any attempt to operate
+         * on the threads would be very incomplete. Let's just report an
+         * error to the caller.
+         */
+        if (!found_parent) {
+          TCMalloc_ResumeAllProcessThreads(num_threads, pids);
+          sys__exit(3);
+        }
+
+        /* Now we are ready to call the callback,
+         * which takes care of resuming the threads for us.
+         */
+        args->result = args->callback(args->parameter, num_threads,
+                                      pids, args->ap);
+        args->err = errno;
+
+        /* Callback should have resumed threads, but better safe than sorry  */
+        if (TCMalloc_ResumeAllProcessThreads(num_threads, pids)) {
+          /* Callback forgot to resume at least one thread, report error     */
+          args->err    = EINVAL;
+          args->result = -1;
+        }
+
+        sys__exit(0);
+      }
+    detach_threads:
+      /* Resume all threads prior to retrying the operation                  */
+      TCMalloc_ResumeAllProcessThreads(num_threads, pids);
+      sig_pids = NULL;
+      num_threads = 0;
+      sig_num_threads = num_threads;
+      max_threads += 100;
+    }
+  }
+}
+
+
+/* This function gets the list of all linux threads of the current process
+ * passes them to the 'callback' along with the 'parameter' pointer; at the
+ * call back call time all the threads are paused via
+ * PTRACE_ATTACH.
+ * The callback is executed from a separate thread which shares only the
+ * address space, the filesystem, and the filehandles with the caller. Most
+ * notably, it does not share the same pid and ppid; and if it terminates,
+ * the rest of the application is still there. 'callback' is supposed to do
+ * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if
+ * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous
+ * signals are blocked. If the 'callback' decides to unblock them, it must
+ * ensure that they cannot terminate the application, or that
+ * TCMalloc_ResumeAllProcessThreads will get called.
+ * It is an error for the 'callback' to make any library calls that could
+ * acquire locks. Most notably, this means that most system calls have to
+ * avoid going through libc. Also, this means that it is not legal to call
+ * exit() or abort().
+ * We return -1 on error and the return value of 'callback' on success.
+ */
+int TCMalloc_ListAllProcessThreads(void *parameter,
+                                   ListAllProcessThreadsCallBack callback, ...) {
+  char                   altstack_mem[ALT_STACKSIZE];
+  struct ListerParams    args;
+  pid_t                  clone_pid;
+  int                    dumpable = 1, sig;
+  struct kernel_sigset_t sig_blocked, sig_old;
+  sem_t                  lock;
+
+  va_start(args.ap, callback);
+
+  /* If we are short on virtual memory, initializing the alternate stack
+   * might trigger a SIGSEGV. Let's do this early, before it could get us
+   * into more trouble (i.e. before signal handlers try to use the alternate
+   * stack, and before we attach to other threads).
+   */
+  memset(altstack_mem, 0, sizeof(altstack_mem));
+
+  /* Some of our cleanup functions could conceivable use more stack space.
+   * Try to touch the stack right now. This could be defeated by the compiler
+   * being too smart for it's own good, so try really hard.
+   */
+  DirtyStack(32768);
+
+  /* Make this process "dumpable". This is necessary in order to ptrace()
+   * after having called setuid().
+   */
+  dumpable = sys_prctl(PR_GET_DUMPABLE, 0);
+  if (!dumpable)
+    sys_prctl(PR_SET_DUMPABLE, 1);
+
+  /* Fill in argument block for dumper thread                                */
+  args.result       = -1;
+  args.err          = 0;
+  args.altstack_mem = altstack_mem;
+  args.parameter    = parameter;
+  args.callback     = callback;
+  args.lock         = &lock;
+
+  /* Before cloning the thread lister, block all asynchronous signals, as we */
+  /* are not prepared to handle them.                                        */
+  sys_sigfillset(&sig_blocked);
+  for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
+    sys_sigdelset(&sig_blocked, sync_signals[sig]);
+  }
+  if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) {
+    args.err = errno;
+    args.result = -1;
+    goto failed;
+  }
+
+  /* scope */ {
+    /* After cloning, both the parent and the child share the same instance
+     * of errno. We must make sure that at least one of these processes
+     * (in our case, the parent) uses modified syscall macros that update
+     * a local copy of errno, instead.
+     */
+    #ifdef __cplusplus
+      #define sys0_sigprocmask sys.sigprocmask
+      #define sys0_waitpid     sys.waitpid
+      SysCalls sys;
+    #else
+      int my_errno;
+      #define SYS_ERRNO        my_errno
+      #define SYS_INLINE       inline
+      #define SYS_PREFIX       0
+      #undef  SYS_LINUX_SYSCALL_SUPPORT_H
+      #include "linux_syscall_support.h"
+    #endif
+
+    /* Lock before clone so that parent can set
+	 * ptrace permissions (if necessary) prior
+     * to ListerThread actually executing
+     */
+    if (sem_init(&lock, 0, 0) == 0) {
+
+      int clone_errno;
+      clone_pid = local_clone((int (*)(void *))ListerThread, &args);
+      clone_errno = errno;
+
+      sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old);
+
+      if (clone_pid >= 0) {
+#ifdef PR_SET_PTRACER
+        /* In newer versions of glibc permission must explicitly
+         * be given to allow for ptrace.
+         */
+        prctl(PR_SET_PTRACER, clone_pid, 0, 0, 0);
+#endif
+        /* Releasing the lock here allows the
+         * ListerThread to execute and ptrace us.
+		 */
+        sem_post(&lock);
+        int status, rc;
+        while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 &&
+               ERRNO == EINTR) {
+                /* Keep waiting                                                 */
+        }
+        if (rc < 0) {
+          args.err = ERRNO;
+          args.result = -1;
+        } else if (WIFEXITED(status)) {
+          switch (WEXITSTATUS(status)) {
+            case 0: break;             /* Normal process termination           */
+            case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected   */
+                    args.result = -1;
+                    break;
+            case 3: args.err = EPERM;  /* Process is already being traced      */
+                    args.result = -1;
+                    break;
+            default:args.err = ECHILD; /* Child died unexpectedly              */
+                    args.result = -1;
+                    break;
+          }
+        } else if (!WIFEXITED(status)) {
+          args.err    = EFAULT;        /* Terminated due to an unhandled signal*/
+          args.result = -1;
+        }
+        sem_destroy(&lock);
+      } else {
+        args.result = -1;
+        args.err    = clone_errno;
+      }
+    } else {
+      args.result = -1;
+      args.err    = errno;
+    }
+  }
+
+  /* Restore the "dumpable" state of the process                             */
+failed:
+  if (!dumpable)
+    sys_prctl(PR_SET_DUMPABLE, dumpable);
+
+  va_end(args.ap);
+
+  errno = args.err;
+  return args.result;
+}
+
+/* This function resumes the list of all linux threads that
+ * TCMalloc_ListAllProcessThreads pauses before giving to its callback.
+ * The function returns non-zero if at least one thread was
+ * suspended and has now been resumed.
+ */
+int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+  int detached_at_least_one = 0;
+  while (num_threads-- > 0) {
+    detached_at_least_one |= sys_ptrace_detach(thread_pids[num_threads]) >= 0;
+  }
+  return detached_at_least_one;
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif

diff --git a/src/base/linuxthreads.h b/src/base/linuxthreads.h
new file mode 100644
index 0000000..16bc8c6
--- /dev/null
+++ b/src/base/linuxthreads.h

@@ -0,0 +1,53 @@
+/* Copyright (c) 2005-2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#ifndef _LINUXTHREADS_H
+#define _LINUXTHREADS_H
+
+/* Include thread_lister.h to get the interface that we implement for linux.
+ */
+
+/* We currently only support x86-32 and x86-64 on Linux. Porting to other
+ * related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
+     defined(__mips__) || defined(__PPC__) || defined(__aarch64__)) && defined(__linux)
+
+/* Define the THREADS symbol to make sure that there is exactly one core dumper
+ * built into the library.
+ */
+#define THREADS "Linux /proc"
+
+#endif
+
+#endif  /* _LINUXTHREADS_H */

diff --git a/src/base/logging.cc b/src/base/logging.cc
new file mode 100644
index 0000000..761c2fd
--- /dev/null
+++ b/src/base/logging.cc

@@ -0,0 +1,108 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// This file just provides storage for FLAGS_verbose.
+
+#include <config.h>
+#include "base/logging.h"
+#include "base/commandlineflags.h"
+
+DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0),
+             "Set to numbers >0 for more verbose output, or <0 for less.  "
+             "--verbose == -4 means we log fatal errors only.");
+
+
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+
+// While windows does have a POSIX-compatible API
+// (_open/_write/_close), it acquires memory.  Using this lower-level
+// windows API is the closest we can get to being "raw".
+RawFD RawOpenForWriting(const char* filename) {
+  // CreateFile allocates memory if file_name isn't absolute, so if
+  // that ever becomes a problem then we ought to compute the absolute
+  // path on its behalf (perhaps the ntdll/kernel function isn't aware
+  // of the working directory?)
+  RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL,
+                         CREATE_ALWAYS, 0, NULL);
+  if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS)
+    SetEndOfFile(fd);    // truncate the existing file
+  return fd;
+}
+
+void RawWrite(RawFD handle, const char* buf, size_t len) {
+  while (len > 0) {
+    DWORD wrote;
+    BOOL ok = WriteFile(handle, buf, len, &wrote, NULL);
+    // We do not use an asynchronous file handle, so ok==false means an error
+    if (!ok) break;
+    buf += wrote;
+    len -= wrote;
+  }
+}
+
+void RawClose(RawFD handle) {
+  CloseHandle(handle);
+}
+
+#else  // _WIN32 || __CYGWIN__ || __CYGWIN32__
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+// Re-run fn until it doesn't cause EINTR.
+#define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
+
+RawFD RawOpenForWriting(const char* filename) {
+  return open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0664);
+}
+
+void RawWrite(RawFD fd, const char* buf, size_t len) {
+  while (len > 0) {
+    ssize_t r;
+    NO_INTR(r = write(fd, buf, len));
+    if (r <= 0) break;
+    buf += r;
+    len -= r;
+  }
+}
+
+void RawClose(RawFD fd) {
+  NO_INTR(close(fd));
+}
+
+#endif  // _WIN32 || __CYGWIN__ || __CYGWIN32__

diff --git a/src/base/logging.h b/src/base/logging.h
new file mode 100644
index 0000000..a1afe4d
--- /dev/null
+++ b/src/base/logging.h

@@ -0,0 +1,259 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// This file contains #include information about logging-related stuff.
+// Pretty much everybody needs to #include this file so that they can
+// log various happenings.
+//
+#ifndef _LOGGING_H_
+#define _LOGGING_H_
+
+#include <config.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>    // for write()
+#endif
+#include <string.h>    // for strlen(), strcmp()
+#include <assert.h>
+#include <errno.h>     // for errno
+#include "base/commandlineflags.h"
+
+// On some systems (like freebsd), we can't call write() at all in a
+// global constructor, perhaps because errno hasn't been set up.
+// (In windows, we can't call it because it might call malloc.)
+// Calling the write syscall is safer (it doesn't set errno), so we
+// prefer that.  Note we don't care about errno for logging: we just
+// do logging on a best-effort basis.
+#if defined(_MSC_VER)
+#define WRITE_TO_STDERR(buf, len) WriteToStderr(buf, len);  // in port.cc
+#elif defined(HAVE_SYS_SYSCALL_H)
+#include <sys/syscall.h>
+#define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len)
+#else
+#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len)
+#endif
+
+// MSVC and mingw define their own, safe version of vnsprintf (the
+// windows one in broken) in port.cc.  Everyone else can use the
+// version here.  We had to give it a unique name for windows.
+#ifndef _WIN32
+# define perftools_vsnprintf vsnprintf
+#endif
+
+
+// We log all messages at this log-level and below.
+// INFO == -1, WARNING == -2, ERROR == -3, FATAL == -4
+DECLARE_int32(verbose);
+
+// CHECK dies with a fatal error if condition is not true.  It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode.  Therefore, it is safe to do things like:
+//    CHECK(fp->Write(x) == 4)
+// Note we use write instead of printf/puts to avoid the risk we'll
+// call malloc().
+#define CHECK(condition)                                                \
+  do {                                                                  \
+    if (!(condition)) {                                                 \
+      WRITE_TO_STDERR("Check failed: " #condition "\n",                 \
+                      sizeof("Check failed: " #condition "\n")-1);      \
+      abort();                                                          \
+    }                                                                   \
+  } while (0)
+
+// This takes a message to print.  The name is historical.
+#define RAW_CHECK(condition, message)                                          \
+  do {                                                                         \
+    if (!(condition)) {                                                        \
+      WRITE_TO_STDERR("Check failed: " #condition ": " message "\n",           \
+                      sizeof("Check failed: " #condition ": " message "\n")-1);\
+      abort();                                                                 \
+    }                                                                          \
+  } while (0)
+
+// This is like RAW_CHECK, but only in debug-mode
+#ifdef NDEBUG
+enum { DEBUG_MODE = 0 };
+#define RAW_DCHECK(condition, message)
+#else
+enum { DEBUG_MODE = 1 };
+#define RAW_DCHECK(condition, message)  RAW_CHECK(condition, message)
+#endif
+
+// This prints errno as well.  Note we use write instead of printf/puts to
+// avoid the risk we'll call malloc().
+#define PCHECK(condition)                                               \
+  do {                                                                  \
+    if (!(condition)) {                                                 \
+      const int err_no = errno;                                         \
+      WRITE_TO_STDERR("Check failed: " #condition ": ",                 \
+                      sizeof("Check failed: " #condition ": ")-1);      \
+      WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no)));      \
+      WRITE_TO_STDERR("\n", sizeof("\n")-1);                            \
+      abort();                                                          \
+    }                                                                   \
+  } while (0)
+
+// Helper macro for binary operators; prints the two values on error
+// Don't use this macro directly in your code, use CHECK_EQ et al below
+
+// WARNING: These don't compile correctly if one of the arguments is a pointer
+// and the other is NULL. To work around this, simply static_cast NULL to the
+// type of the desired pointer.
+
+// TODO(jandrews): Also print the values in case of failure.  Requires some
+// sort of type-sensitive ToString() function.
+#define CHECK_OP(op, val1, val2)                                        \
+  do {                                                                  \
+    if (!((val1) op (val2))) {                                          \
+      fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2);   \
+      abort();                                                          \
+    }                                                                   \
+  } while (0)
+
+#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
+#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
+#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
+#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
+#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
+#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)
+
+// Synonyms for CHECK_* that are used in some unittests.
+#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2)
+#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2)
+#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2)
+#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2)
+#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2)
+#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2)
+#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2)
+#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2)
+#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2)
+#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2)
+#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2)
+#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2)
+// As are these variants.
+#define EXPECT_TRUE(cond)     CHECK(cond)
+#define EXPECT_FALSE(cond)    CHECK(!(cond))
+#define EXPECT_STREQ(a, b)    CHECK(strcmp(a, b) == 0)
+#define ASSERT_TRUE(cond)     EXPECT_TRUE(cond)
+#define ASSERT_FALSE(cond)    EXPECT_FALSE(cond)
+#define ASSERT_STREQ(a, b)    EXPECT_STREQ(a, b)
+
+// Used for (libc) functions that return -1 and set errno
+#define CHECK_ERR(invocation)  PCHECK((invocation) != -1)
+
+// A few more checks that only happen in debug mode
+#ifdef NDEBUG
+#define DCHECK_EQ(val1, val2)
+#define DCHECK_NE(val1, val2)
+#define DCHECK_LE(val1, val2)
+#define DCHECK_LT(val1, val2)
+#define DCHECK_GE(val1, val2)
+#define DCHECK_GT(val1, val2)
+#else
+#define DCHECK_EQ(val1, val2)  CHECK_EQ(val1, val2)
+#define DCHECK_NE(val1, val2)  CHECK_NE(val1, val2)
+#define DCHECK_LE(val1, val2)  CHECK_LE(val1, val2)
+#define DCHECK_LT(val1, val2)  CHECK_LT(val1, val2)
+#define DCHECK_GE(val1, val2)  CHECK_GE(val1, val2)
+#define DCHECK_GT(val1, val2)  CHECK_GT(val1, val2)
+#endif
+
+
+#ifdef ERROR
+#undef ERROR      // may conflict with ERROR macro on windows
+#endif
+enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4};
+
+// NOTE: we add a newline to the end of the output if it's not there already
+inline void LogPrintf(int severity, const char* pat, va_list ap) {
+  // We write directly to the stderr file descriptor and avoid FILE
+  // buffering because that may invoke malloc()
+  char buf[600];
+  perftools_vsnprintf(buf, sizeof(buf)-1, pat, ap);
+  if (buf[0] != '\0' && buf[strlen(buf)-1] != '\n') {
+    assert(strlen(buf)+1 < sizeof(buf));
+    strcat(buf, "\n");
+  }
+  WRITE_TO_STDERR(buf, strlen(buf));
+  if ((severity) == FATAL)
+    abort(); // LOG(FATAL) indicates a big problem, so don't run atexit() calls
+}
+
+// Note that since the order of global constructors is unspecified,
+// global code that calls RAW_LOG may execute before FLAGS_verbose is set.
+// Such code will run with verbosity == 0 no matter what.
+#define VLOG_IS_ON(severity) (FLAGS_verbose >= severity)
+
+// In a better world, we'd use __VA_ARGS__, but VC++ 7 doesn't support it.
+#define LOG_PRINTF(severity, pat) do {          \
+  if (VLOG_IS_ON(severity)) {                   \
+    va_list ap;                                 \
+    va_start(ap, pat);                          \
+    LogPrintf(severity, pat, ap);               \
+    va_end(ap);                                 \
+  }                                             \
+} while (0)
+
+// RAW_LOG is the main function; some synonyms are used in unittests.
+inline void RAW_LOG(int lvl, const char* pat, ...)  { LOG_PRINTF(lvl, pat); }
+inline void RAW_VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); }
+inline void LOG(int lvl, const char* pat, ...)      { LOG_PRINTF(lvl, pat); }
+inline void VLOG(int lvl, const char* pat, ...)     { LOG_PRINTF(lvl, pat); }
+inline void LOG_IF(int lvl, bool cond, const char* pat, ...) {
+  if (cond)  LOG_PRINTF(lvl, pat);
+}
+
+// This isn't technically logging, but it's also IO and also is an
+// attempt to be "raw" -- that is, to not use any higher-level libc
+// routines that might allocate memory or (ideally) try to allocate
+// locks.  We use an opaque file handle (not necessarily an int)
+// to allow even more low-level stuff in the future.
+// Like other "raw" routines, these functions are best effort, and
+// thus don't return error codes (except RawOpenForWriting()).
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+#ifndef NOMINMAX
+#define NOMINMAX     // @#!$& windows
+#endif
+#include <windows.h>
+typedef HANDLE RawFD;
+const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE;
+#else
+typedef int RawFD;
+const RawFD kIllegalRawFD = -1;   // what open returns if it fails
+#endif  // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+
+RawFD RawOpenForWriting(const char* filename);   // uses default permissions
+void RawWrite(RawFD fd, const char* buf, size_t len);
+void RawClose(RawFD fd);
+
+#endif // _LOGGING_H_

diff --git a/src/base/low_level_alloc.cc b/src/base/low_level_alloc.cc
new file mode 100644
index 0000000..4d2ae8d
--- /dev/null
+++ b/src/base/low_level_alloc.cc

@@ -0,0 +1,523 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// A low-level allocator that can be used by other low-level
+// modules without introducing dependency cycles.
+// This allocator is slow and wasteful of memory;
+// it should not be used when performance is key.
+
+#include "base/low_level_alloc.h"
+#include "base/dynamic_annotations.h"
+#include "base/spinlock.h"
+#include "base/logging.h"
+#include "malloc_hook-inl.h"
+#include <gperftools/malloc_hook.h>
+#include <errno.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#endif
+#include <new>                   // for placement-new
+
+// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
+// form of the name instead.
+#ifndef MAP_ANONYMOUS
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// A first-fit allocator with amortized logarithmic free() time.
+
+// ---------------------------------------------------------------------------
+static const int kMaxLevel = 30;
+
+// We put this class-only struct in a namespace to avoid polluting the
+// global namespace with this struct name (thus risking an ODR violation).
+namespace low_level_alloc_internal {
+  // This struct describes one allocated block, or one free block.
+  struct AllocList {
+    struct Header {
+      intptr_t size;  // size of entire region, including this field. Must be
+                      // first.  Valid in both allocated and unallocated blocks
+      intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this
+      LowLevelAlloc::Arena *arena; // pointer to parent arena
+      void *dummy_for_alignment;   // aligns regions to 0 mod 2*sizeof(void*)
+    } header;
+
+    // Next two fields: in unallocated blocks: freelist skiplist data
+    //                  in allocated blocks: overlaps with client data
+    int levels;           // levels in skiplist used
+    AllocList *next[kMaxLevel];   // actually has levels elements.
+                                  // The AllocList node may not have room for
+                                  // all kMaxLevel entries.  See max_fit in
+                                  // LLA_SkiplistLevels()
+  };
+}
+using low_level_alloc_internal::AllocList;
+
+
+// ---------------------------------------------------------------------------
+// A trivial skiplist implementation.  This is used to keep the freelist
+// in address order while taking only logarithmic time per insert and delete.
+
+// An integer approximation of log2(size/base)
+// Requires size >= base.
+static int IntLog2(size_t size, size_t base) {
+  int result = 0;
+  for (size_t i = size; i > base; i >>= 1) { // i == floor(size/2**result)
+    result++;
+  }
+  //    floor(size / 2**result) <= base < floor(size / 2**(result-1))
+  // =>     log2(size/(base+1)) <= result < 1+log2(size/base)
+  // => result ~= log2(size/base)
+  return result;
+}
+
+// Return a random integer n:  p(n)=1/(2**n) if 1 <= n; p(n)=0 if n < 1.
+static int Random() {
+  static int32 r = 1;         // no locking---it's not critical
+  ANNOTATE_BENIGN_RACE(&r, "benign race, not critical.");
+  int result = 1;
+  while ((((r = r*1103515245 + 12345) >> 30) & 1) == 0) {
+    result++;
+  }
+  return result;
+}
+
+// Return a number of skiplist levels for a node of size bytes, where
+// base is the minimum node size.  Compute level=log2(size / base)+n
+// where n is 1 if random is false and otherwise a random number generated with
+// the standard distribution for a skiplist:  See Random() above.
+// Bigger nodes tend to have more skiplist levels due to the log2(size / base)
+// term, so first-fit searches touch fewer nodes.  "level" is clipped so
+// level<kMaxLevel and next[level-1] will fit in the node.
+// 0 < LLA_SkiplistLevels(x,y,false) <= LLA_SkiplistLevels(x,y,true) < kMaxLevel
+static int LLA_SkiplistLevels(size_t size, size_t base, bool random) {
+  // max_fit is the maximum number of levels that will fit in a node for the
+  // given size.   We can't return more than max_fit, no matter what the
+  // random number generator says.
+  int max_fit = (size-OFFSETOF_MEMBER(AllocList, next)) / sizeof (AllocList *);
+  int level = IntLog2(size, base) + (random? Random() : 1);
+  if (level > max_fit)     level = max_fit;
+  if (level > kMaxLevel-1) level = kMaxLevel - 1;
+  RAW_CHECK(level >= 1, "block not big enough for even one level");
+  return level;
+}
+
+// Return "atleast", the first element of AllocList *head s.t. *atleast >= *e.
+// For 0 <= i < head->levels, set prev[i] to "no_greater", where no_greater
+// points to the last element at level i in the AllocList less than *e, or is
+// head if no such element exists.
+static AllocList *LLA_SkiplistSearch(AllocList *head,
+                                     AllocList *e, AllocList **prev) {
+  AllocList *p = head;
+  for (int level = head->levels - 1; level >= 0; level--) {
+    for (AllocList *n; (n = p->next[level]) != 0 && n < e; p = n) {
+    }
+    prev[level] = p;
+  }
+  return (head->levels == 0) ?  0 : prev[0]->next[0];
+}
+
+// Insert element *e into AllocList *head.  Set prev[] as LLA_SkiplistSearch.
+// Requires that e->levels be previously set by the caller (using
+// LLA_SkiplistLevels())
+static void LLA_SkiplistInsert(AllocList *head, AllocList *e,
+                               AllocList **prev) {
+  LLA_SkiplistSearch(head, e, prev);
+  for (; head->levels < e->levels; head->levels++) { // extend prev pointers
+    prev[head->levels] = head;                       // to all *e's levels
+  }
+  for (int i = 0; i != e->levels; i++) { // add element to list
+    e->next[i] = prev[i]->next[i];
+    prev[i]->next[i] = e;
+  }
+}
+
+// Remove element *e from AllocList *head.  Set prev[] as LLA_SkiplistSearch().
+// Requires that e->levels be previous set by the caller (using
+// LLA_SkiplistLevels())
+static void LLA_SkiplistDelete(AllocList *head, AllocList *e,
+                               AllocList **prev) {
+  AllocList *found = LLA_SkiplistSearch(head, e, prev);
+  RAW_CHECK(e == found, "element not in freelist");
+  for (int i = 0; i != e->levels && prev[i]->next[i] == e; i++) {
+    prev[i]->next[i] = e->next[i];
+  }
+  while (head->levels > 0 && head->next[head->levels - 1] == 0) {
+    head->levels--;   // reduce head->levels if level unused
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Arena implementation
+
+struct LowLevelAlloc::Arena {
+  Arena() : mu(SpinLock::LINKER_INITIALIZED) {} // does nothing; for static init
+  explicit Arena(int) : pagesize(0) {}  // set pagesize to zero explicitly
+                                        // for non-static init
+
+  SpinLock mu;            // protects freelist, allocation_count,
+                          // pagesize, roundup, min_size
+  AllocList freelist;     // head of free list; sorted by addr (under mu)
+  int32 allocation_count; // count of allocated blocks (under mu)
+  int32 flags;            // flags passed to NewArena (ro after init)
+  size_t pagesize;        // ==getpagesize()  (init under mu, then ro)
+  size_t roundup;         // lowest power of 2 >= max(16,sizeof (AllocList))
+                          // (init under mu, then ro)
+  size_t min_size;        // smallest allocation block size
+                          // (init under mu, then ro)
+};
+
+// The default arena, which is used when 0 is passed instead of an Arena
+// pointer.
+static struct LowLevelAlloc::Arena default_arena;
+
+// Non-malloc-hooked arenas: used only to allocate metadata for arenas that
+// do not want malloc hook reporting, so that for them there's no malloc hook
+// reporting even during arena creation.
+static struct LowLevelAlloc::Arena unhooked_arena;
+static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena;
+
+// magic numbers to identify allocated and unallocated blocks
+static const intptr_t kMagicAllocated = 0x4c833e95;
+static const intptr_t kMagicUnallocated = ~kMagicAllocated;
+
+namespace {
+  class SCOPED_LOCKABLE ArenaLock {
+   public:
+    explicit ArenaLock(LowLevelAlloc::Arena *arena)
+        EXCLUSIVE_LOCK_FUNCTION(arena->mu)
+        : left_(false), mask_valid_(false), arena_(arena) {
+      if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+      // We've decided not to support async-signal-safe arena use until
+      // there a demonstrated need.  Here's how one could do it though
+      // (would need to be made more portable).
+#if 0
+        sigset_t all;
+        sigfillset(&all);
+        this->mask_valid_ =
+            (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0);
+#else
+        RAW_CHECK(false, "We do not yet support async-signal-safe arena.");
+#endif
+      }
+      this->arena_->mu.Lock();
+    }
+    ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); }
+    void Leave() /*UNLOCK_FUNCTION()*/ {
+      this->arena_->mu.Unlock();
+#if 0
+      if (this->mask_valid_) {
+        pthread_sigmask(SIG_SETMASK, &this->mask_, 0);
+      }
+#endif
+      this->left_ = true;
+    }
+   private:
+    bool left_;       // whether left region
+    bool mask_valid_;
+#if 0
+    sigset_t mask_;   // old mask of blocked signals
+#endif
+    LowLevelAlloc::Arena *arena_;
+    DISALLOW_COPY_AND_ASSIGN(ArenaLock);
+  };
+} // anonymous namespace
+
+// create an appropriate magic number for an object at "ptr"
+// "magic" should be kMagicAllocated or kMagicUnallocated
+inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) {
+  return magic ^ reinterpret_cast<intptr_t>(ptr);
+}
+
+// Initialize the fields of an Arena
+static void ArenaInit(LowLevelAlloc::Arena *arena) {
+  if (arena->pagesize == 0) {
+    arena->pagesize = getpagesize();
+    // Round up block sizes to a power of two close to the header size.
+    arena->roundup = 16;
+    while (arena->roundup < sizeof (arena->freelist.header)) {
+      arena->roundup += arena->roundup;
+    }
+    // Don't allocate blocks less than twice the roundup size to avoid tiny
+    // free blocks.
+    arena->min_size = 2 * arena->roundup;
+    arena->freelist.header.size = 0;
+    arena->freelist.header.magic =
+        Magic(kMagicUnallocated, &arena->freelist.header);
+    arena->freelist.header.arena = arena;
+    arena->freelist.levels = 0;
+    memset(arena->freelist.next, 0, sizeof (arena->freelist.next));
+    arena->allocation_count = 0;
+    if (arena == &default_arena) {
+      // Default arena should be hooked, e.g. for heap-checker to trace
+      // pointer chains through objects in the default arena.
+      arena->flags = LowLevelAlloc::kCallMallocHook;
+    } else if (arena == &unhooked_async_sig_safe_arena) {
+      arena->flags = LowLevelAlloc::kAsyncSignalSafe;
+    } else {
+      arena->flags = 0;   // other arenas' flags may be overridden by client,
+                          // but unhooked_arena will have 0 in 'flags'.
+    }
+  }
+}
+
+// L < meta_data_arena->mu
+LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags,
+                                              Arena *meta_data_arena) {
+  RAW_CHECK(meta_data_arena != 0, "must pass a valid arena");
+  if (meta_data_arena == &default_arena) {
+    if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+      meta_data_arena = &unhooked_async_sig_safe_arena;
+    } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) {
+      meta_data_arena = &unhooked_arena;
+    }
+  }
+  // Arena(0) uses the constructor for non-static contexts
+  Arena *result =
+    new (AllocWithArena(sizeof (*result), meta_data_arena)) Arena(0);
+  ArenaInit(result);
+  result->flags = flags;
+  return result;
+}
+
+// L < arena->mu, L < arena->arena->mu
+bool LowLevelAlloc::DeleteArena(Arena *arena) {
+  RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena,
+            "may not delete default arena");
+  ArenaLock section(arena);
+  bool empty = (arena->allocation_count == 0);
+  section.Leave();
+  if (empty) {
+    while (arena->freelist.next[0] != 0) {
+      AllocList *region = arena->freelist.next[0];
+      size_t size = region->header.size;
+      arena->freelist.next[0] = region->next[0];
+      RAW_CHECK(region->header.magic ==
+                Magic(kMagicUnallocated, &region->header),
+                "bad magic number in DeleteArena()");
+      RAW_CHECK(region->header.arena == arena,
+                "bad arena pointer in DeleteArena()");
+      RAW_CHECK(size % arena->pagesize == 0,
+                "empty arena has non-page-aligned block size");
+      RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0,
+                "empty arena has non-page-aligned block");
+      int munmap_result;
+      if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) {
+        munmap_result = munmap(region, size);
+      } else {
+        munmap_result = MallocHook::UnhookedMUnmap(region, size);
+      }
+      RAW_CHECK(munmap_result == 0,
+                "LowLevelAlloc::DeleteArena:  munmap failed address");
+    }
+    Free(arena);
+  }
+  return empty;
+}
+
+// ---------------------------------------------------------------------------
+
+// Return value rounded up to next multiple of align.
+// align must be a power of two.
+static intptr_t RoundUp(intptr_t addr, intptr_t align) {
+  return (addr + align - 1) & ~(align - 1);
+}
+
+// Equivalent to "return prev->next[i]" but with sanity checking
+// that the freelist is in the correct order, that it
+// consists of regions marked "unallocated", and that no two regions
+// are adjacent in memory (they should have been coalesced).
+// L < arena->mu
+static AllocList *Next(int i, AllocList *prev, LowLevelAlloc::Arena *arena) {
+  RAW_CHECK(i < prev->levels, "too few levels in Next()");
+  AllocList *next = prev->next[i];
+  if (next != 0) {
+    RAW_CHECK(next->header.magic == Magic(kMagicUnallocated, &next->header),
+              "bad magic number in Next()");
+    RAW_CHECK(next->header.arena == arena,
+              "bad arena pointer in Next()");
+    if (prev != &arena->freelist) {
+      RAW_CHECK(prev < next, "unordered freelist");
+      RAW_CHECK(reinterpret_cast<char *>(prev) + prev->header.size <
+                reinterpret_cast<char *>(next), "malformed freelist");
+    }
+  }
+  return next;
+}
+
+// Coalesce list item "a" with its successor if they are adjacent.
+static void Coalesce(AllocList *a) {
+  AllocList *n = a->next[0];
+  if (n != 0 && reinterpret_cast<char *>(a) + a->header.size ==
+                    reinterpret_cast<char *>(n)) {
+    LowLevelAlloc::Arena *arena = a->header.arena;
+    a->header.size += n->header.size;
+    n->header.magic = 0;
+    n->header.arena = 0;
+    AllocList *prev[kMaxLevel];
+    LLA_SkiplistDelete(&arena->freelist, n, prev);
+    LLA_SkiplistDelete(&arena->freelist, a, prev);
+    a->levels = LLA_SkiplistLevels(a->header.size, arena->min_size, true);
+    LLA_SkiplistInsert(&arena->freelist, a, prev);
+  }
+}
+
+// Adds block at location "v" to the free list
+// L >= arena->mu
+static void AddToFreelist(void *v, LowLevelAlloc::Arena *arena) {
+  AllocList *f = reinterpret_cast<AllocList *>(
+                        reinterpret_cast<char *>(v) - sizeof (f->header));
+  RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header),
+            "bad magic number in AddToFreelist()");
+  RAW_CHECK(f->header.arena == arena,
+            "bad arena pointer in AddToFreelist()");
+  f->levels = LLA_SkiplistLevels(f->header.size, arena->min_size, true);
+  AllocList *prev[kMaxLevel];
+  LLA_SkiplistInsert(&arena->freelist, f, prev);
+  f->header.magic = Magic(kMagicUnallocated, &f->header);
+  Coalesce(f);                  // maybe coalesce with successor
+  Coalesce(prev[0]);            // maybe coalesce with predecessor
+}
+
+// Frees storage allocated by LowLevelAlloc::Alloc().
+// L < arena->mu
+void LowLevelAlloc::Free(void *v) {
+  if (v != 0) {
+    AllocList *f = reinterpret_cast<AllocList *>(
+                        reinterpret_cast<char *>(v) - sizeof (f->header));
+    RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header),
+              "bad magic number in Free()");
+    LowLevelAlloc::Arena *arena = f->header.arena;
+    if ((arena->flags & kCallMallocHook) != 0) {
+      MallocHook::InvokeDeleteHook(v);
+    }
+    ArenaLock section(arena);
+    AddToFreelist(v, arena);
+    RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free");
+    arena->allocation_count--;
+    section.Leave();
+  }
+}
+
+// allocates and returns a block of size bytes, to be freed with Free()
+// L < arena->mu
+static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
+  void *result = 0;
+  if (request != 0) {
+    AllocList *s;       // will point to region that satisfies request
+    ArenaLock section(arena);
+    ArenaInit(arena);
+    // round up with header
+    size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup);
+    for (;;) {      // loop until we find a suitable region
+      // find the minimum levels that a block of this size must have
+      int i = LLA_SkiplistLevels(req_rnd, arena->min_size, false) - 1;
+      if (i < arena->freelist.levels) {   // potential blocks exist
+        AllocList *before = &arena->freelist;  // predecessor of s
+        while ((s = Next(i, before, arena)) != 0 && s->header.size < req_rnd) {
+          before = s;
+        }
+        if (s != 0) {       // we found a region
+          break;
+        }
+      }
+      // we unlock before mmap() both because mmap() may call a callback hook,
+      // and because it may be slow.
+      arena->mu.Unlock();
+      // mmap generous 64K chunks to decrease
+      // the chances/impact of fragmentation:
+      size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16);
+      void *new_pages;
+      if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+        new_pages = MallocHook::UnhookedMMap(0, new_pages_size,
+            PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+      } else {
+        new_pages = mmap(0, new_pages_size,
+            PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+      }
+      RAW_CHECK(new_pages != MAP_FAILED, "mmap error");
+      arena->mu.Lock();
+      s = reinterpret_cast<AllocList *>(new_pages);
+      s->header.size = new_pages_size;
+      // Pretend the block is allocated; call AddToFreelist() to free it.
+      s->header.magic = Magic(kMagicAllocated, &s->header);
+      s->header.arena = arena;
+      AddToFreelist(&s->levels, arena);  // insert new region into free list
+    }
+    AllocList *prev[kMaxLevel];
+    LLA_SkiplistDelete(&arena->freelist, s, prev);    // remove from free list
+    // s points to the first free region that's big enough
+    if (req_rnd + arena->min_size <= s->header.size) {  // big enough to split
+      AllocList *n = reinterpret_cast<AllocList *>
+                        (req_rnd + reinterpret_cast<char *>(s));
+      n->header.size = s->header.size - req_rnd;
+      n->header.magic = Magic(kMagicAllocated, &n->header);
+      n->header.arena = arena;
+      s->header.size = req_rnd;
+      AddToFreelist(&n->levels, arena);
+    }
+    s->header.magic = Magic(kMagicAllocated, &s->header);
+    RAW_CHECK(s->header.arena == arena, "");
+    arena->allocation_count++;
+    section.Leave();
+    result = &s->levels;
+  }
+  ANNOTATE_NEW_MEMORY(result, request);
+  return result;
+}
+
+void *LowLevelAlloc::Alloc(size_t request) {
+  void *result = DoAllocWithArena(request, &default_arena);
+  if ((default_arena.flags & kCallMallocHook) != 0) {
+    // this call must be directly in the user-called allocator function
+    // for MallocHook::GetCallerStackTrace to work properly
+    MallocHook::InvokeNewHook(result, request);
+  }
+  return result;
+}
+
+void *LowLevelAlloc::AllocWithArena(size_t request, Arena *arena) {
+  RAW_CHECK(arena != 0, "must pass a valid arena");
+  void *result = DoAllocWithArena(request, arena);
+  if ((arena->flags & kCallMallocHook) != 0) {
+    // this call must be directly in the user-called allocator function
+    // for MallocHook::GetCallerStackTrace to work properly
+    MallocHook::InvokeNewHook(result, request);
+  }
+  return result;
+}
+
+LowLevelAlloc::Arena *LowLevelAlloc::DefaultArena() {
+  return &default_arena;
+}

diff --git a/src/base/low_level_alloc.h b/src/base/low_level_alloc.h
new file mode 100644
index 0000000..4081ff8
--- /dev/null
+++ b/src/base/low_level_alloc.h

@@ -0,0 +1,107 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if !defined(_BASE_LOW_LEVEL_ALLOC_H_)
+#define _BASE_LOW_LEVEL_ALLOC_H_
+
+// A simple thread-safe memory allocator that does not depend on
+// mutexes or thread-specific data.  It is intended to be used
+// sparingly, and only when malloc() would introduce an unwanted
+// dependency, such as inside the heap-checker.
+
+#include <config.h>
+#include <stddef.h>             // for size_t
+#include "base/basictypes.h"
+
+class LowLevelAlloc {
+ public:
+  struct Arena;       // an arena from which memory may be allocated
+
+  // Returns a pointer to a block of at least "request" bytes
+  // that have been newly allocated from the specific arena.
+  // for Alloc() call the DefaultArena() is used.
+  // Returns 0 if passed request==0.
+  // Does not return 0 under other circumstances; it crashes if memory
+  // is not available.
+  static void *Alloc(size_t request)
+    ATTRIBUTE_SECTION(malloc_hook);
+  static void *AllocWithArena(size_t request, Arena *arena)
+    ATTRIBUTE_SECTION(malloc_hook);
+
+  // Deallocates a region of memory that was previously allocated with
+  // Alloc().   Does nothing if passed 0.   "s" must be either 0,
+  // or must have been returned from a call to Alloc() and not yet passed to
+  // Free() since that call to Alloc().  The space is returned to the arena
+  // from which it was allocated.
+  static void Free(void *s) ATTRIBUTE_SECTION(malloc_hook);
+
+    // ATTRIBUTE_SECTION(malloc_hook) for Alloc* and Free
+    // are to put all callers of MallocHook::Invoke* in this module
+    // into special section,
+    // so that MallocHook::GetCallerStackTrace can function accurately.
+
+  // Create a new arena.
+  // The root metadata for the new arena is allocated in the
+  // meta_data_arena; the DefaultArena() can be passed for meta_data_arena.
+  // These values may be ored into flags:
+  enum {
+    // Report calls to Alloc() and Free() via the MallocHook interface.
+    // Set in the DefaultArena.
+    kCallMallocHook = 0x0001,
+
+    // Make calls to Alloc(), Free() be async-signal-safe.  Not set in
+    // DefaultArena().
+    kAsyncSignalSafe = 0x0002,
+
+    // When used with DefaultArena(), the NewArena() and DeleteArena() calls
+    // obey the flags given explicitly in the NewArena() call, even if those
+    // flags differ from the settings in DefaultArena().  So the call
+    // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe,
+    // as well as generatating an arena that provides async-signal-safe
+    // Alloc/Free.
+  };
+  static Arena *NewArena(int32 flags, Arena *meta_data_arena);
+
+  // Destroys an arena allocated by NewArena and returns true,
+  // provided no allocated blocks remain in the arena.
+  // If allocated blocks remain in the arena, does nothing and
+  // returns false.
+  // It is illegal to attempt to destroy the DefaultArena().
+  static bool DeleteArena(Arena *arena);
+
+  // The default arena that always exists.
+  static Arena *DefaultArena();
+
+ private:
+  LowLevelAlloc();      // no instances
+};
+
+#endif

diff --git a/src/base/simple_mutex.h b/src/base/simple_mutex.h
new file mode 100644
index 0000000..a1886e4
--- /dev/null
+++ b/src/base/simple_mutex.h

@@ -0,0 +1,332 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+// ---
+// Author: Craig Silverstein.
+//
+// A simple mutex wrapper, supporting locks and read-write locks.
+// You should assume the locks are *not* re-entrant.
+//
+// To use: you should define the following macros in your configure.ac:
+//   ACX_PTHREAD
+//   AC_RWLOCK
+// The latter is defined in ../autoconf.
+//
+// This class is meant to be internal-only and should be wrapped by an
+// internal namespace.  Before you use this module, please give the
+// name of your internal namespace for this module.  Or, if you want
+// to expose it, you'll want to move it to the Google namespace.  We
+// cannot put this class in global namespace because there can be some
+// problems when we have multiple versions of Mutex in each shared object.
+//
+// NOTE: TryLock() is broken for NO_THREADS mode, at least in NDEBUG
+//       mode.
+//
+// CYGWIN NOTE: Cygwin support for rwlock seems to be buggy:
+//    http://www.cygwin.com/ml/cygwin/2008-12/msg00017.html
+// Because of that, we might as well use windows locks for
+// cygwin.  They seem to be more reliable than the cygwin pthreads layer.
+//
+// TRICKY IMPLEMENTATION NOTE:
+// This class is designed to be safe to use during
+// dynamic-initialization -- that is, by global constructors that are
+// run before main() starts.  The issue in this case is that
+// dynamic-initialization happens in an unpredictable order, and it
+// could be that someone else's dynamic initializer could call a
+// function that tries to acquire this mutex -- but that all happens
+// before this mutex's constructor has run.  (This can happen even if
+// the mutex and the function that uses the mutex are in the same .cc
+// file.)  Basically, because Mutex does non-trivial work in its
+// constructor, it's not, in the naive implementation, safe to use
+// before dynamic initialization has run on it.
+//
+// The solution used here is to pair the actual mutex primitive with a
+// bool that is set to true when the mutex is dynamically initialized.
+// (Before that it's false.)  Then we modify all mutex routines to
+// look at the bool, and not try to lock/unlock until the bool makes
+// it to true (which happens after the Mutex constructor has run.)
+//
+// This works because before main() starts -- particularly, during
+// dynamic initialization -- there are no threads, so a) it's ok that
+// the mutex operations are a no-op, since we don't need locking then
+// anyway; and b) we can be quite confident our bool won't change
+// state between a call to Lock() and a call to Unlock() (that would
+// require a global constructor in one translation unit to call Lock()
+// and another global constructor in another translation unit to call
+// Unlock() later, which is pretty perverse).
+//
+// That said, it's tricky, and can conceivably fail; it's safest to
+// avoid trying to acquire a mutex in a global constructor, if you
+// can.  One way it can fail is that a really smart compiler might
+// initialize the bool to true at static-initialization time (too
+// early) rather than at dynamic-initialization time.  To discourage
+// that, we set is_safe_ to true in code (not the constructor
+// colon-initializer) and set it to true via a function that always
+// evaluates to true, but that the compiler can't know always
+// evaluates to true.  This should be good enough.
+//
+// A related issue is code that could try to access the mutex
+// after it's been destroyed in the global destructors (because
+// the Mutex global destructor runs before some other global
+// destructor, that tries to acquire the mutex).  The way we
+// deal with this is by taking a constructor arg that global
+// mutexes should pass in, that causes the destructor to do no
+// work.  We still depend on the compiler not doing anything
+// weird to a Mutex's memory after it is destroyed, but for a
+// static global variable, that's pretty safe.
+
+#ifndef GOOGLE_MUTEX_H_
+#define GOOGLE_MUTEX_H_
+
+#include <config.h>
+
+#if defined(NO_THREADS)
+  typedef int MutexType;      // to keep a lock-count
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+# ifndef WIN32_LEAN_AND_MEAN
+#   define WIN32_LEAN_AND_MEAN  // We only need minimal includes
+# endif
+  // We need Windows NT or later for TryEnterCriticalSection().  If you
+  // don't need that functionality, you can remove these _WIN32_WINNT
+  // lines, and change TryLock() to assert(0) or something.
+# ifndef _WIN32_WINNT
+#   define _WIN32_WINNT 0x0400
+# endif
+# include <windows.h>
+  typedef CRITICAL_SECTION MutexType;
+#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
+  // Needed for pthread_rwlock_*.  If it causes problems, you could take it
+  // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
+  // *does* cause problems for FreeBSD, or MacOSX, but isn't needed
+  // for locking there.)
+# ifdef __linux__
+#   define _XOPEN_SOURCE 500  // may be needed to get the rwlock calls
+# endif
+# include <pthread.h>
+  typedef pthread_rwlock_t MutexType;
+#elif defined(HAVE_PTHREAD)
+# include <pthread.h>
+  typedef pthread_mutex_t MutexType;
+#else
+# error Need to implement mutex.h for your architecture, or #define NO_THREADS
+#endif
+
+#include <assert.h>
+#include <stdlib.h>      // for abort()
+
+#define MUTEX_NAMESPACE perftools_mutex_namespace
+
+namespace MUTEX_NAMESPACE {
+
+class Mutex {
+ public:
+  // This is used for the single-arg constructor
+  enum LinkerInitialized { LINKER_INITIALIZED };
+
+  // Create a Mutex that is not held by anybody.  This constructor is
+  // typically used for Mutexes allocated on the heap or the stack.
+  inline Mutex();
+  // This constructor should be used for global, static Mutex objects.
+  // It inhibits work being done by the destructor, which makes it
+  // safer for code that tries to acqiure this mutex in their global
+  // destructor.
+  inline Mutex(LinkerInitialized);
+
+  // Destructor
+  inline ~Mutex();
+
+  inline void Lock();    // Block if needed until free then acquire exclusively
+  inline void Unlock();  // Release a lock acquired via Lock()
+  inline bool TryLock(); // If free, Lock() and return true, else return false
+  // Note that on systems that don't support read-write locks, these may
+  // be implemented as synonyms to Lock() and Unlock().  So you can use
+  // these for efficiency, but don't use them anyplace where being able
+  // to do shared reads is necessary to avoid deadlock.
+  inline void ReaderLock();   // Block until free or shared then acquire a share
+  inline void ReaderUnlock(); // Release a read share of this Mutex
+  inline void WriterLock() { Lock(); }     // Acquire an exclusive lock
+  inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
+
+ private:
+  MutexType mutex_;
+  // We want to make sure that the compiler sets is_safe_ to true only
+  // when we tell it to, and never makes assumptions is_safe_ is
+  // always true.  volatile is the most reliable way to do that.
+  volatile bool is_safe_;
+  // This indicates which constructor was called.
+  bool destroy_;
+
+  inline void SetIsSafe() { is_safe_ = true; }
+
+  // Catch the error of writing Mutex when intending MutexLock.
+  Mutex(Mutex* /*ignored*/) {}
+  // Disallow "evil" constructors
+  Mutex(const Mutex&);
+  void operator=(const Mutex&);
+};
+
+// Now the implementation of Mutex for various systems
+#if defined(NO_THREADS)
+
+// When we don't have threads, we can be either reading or writing,
+// but not both.  We can have lots of readers at once (in no-threads
+// mode, that's most likely to happen in recursive function calls),
+// but only one writer.  We represent this by having mutex_ be -1 when
+// writing and a number > 0 when reading (and 0 when no lock is held).
+//
+// In debug mode, we assert these invariants, while in non-debug mode
+// we do nothing, for efficiency.  That's why everything is in an
+// assert.
+
+Mutex::Mutex() : mutex_(0) { }
+Mutex::Mutex(Mutex::LinkerInitialized) : mutex_(0) { }
+Mutex::~Mutex()            { assert(mutex_ == 0); }
+void Mutex::Lock()         { assert(--mutex_ == -1); }
+void Mutex::Unlock()       { assert(mutex_++ == -1); }
+bool Mutex::TryLock()      { if (mutex_) return false; Lock(); return true; }
+void Mutex::ReaderLock()   { assert(++mutex_ > 0); }
+void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
+
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+
+Mutex::Mutex() : destroy_(true) {
+  InitializeCriticalSection(&mutex_);
+  SetIsSafe();
+}
+Mutex::Mutex(LinkerInitialized) : destroy_(false) {
+  InitializeCriticalSection(&mutex_);
+  SetIsSafe();
+}
+Mutex::~Mutex()            { if (destroy_) DeleteCriticalSection(&mutex_); }
+void Mutex::Lock()         { if (is_safe_) EnterCriticalSection(&mutex_); }
+void Mutex::Unlock()       { if (is_safe_) LeaveCriticalSection(&mutex_); }
+bool Mutex::TryLock()      { return is_safe_ ?
+                                 TryEnterCriticalSection(&mutex_) != 0 : true; }
+void Mutex::ReaderLock()   { Lock(); }      // we don't have read-write locks
+void Mutex::ReaderUnlock() { Unlock(); }
+
+#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
+
+#define SAFE_PTHREAD(fncall)  do {   /* run fncall if is_safe_ is true */  \
+  if (is_safe_ && fncall(&mutex_) != 0) abort();                           \
+} while (0)
+
+Mutex::Mutex() : destroy_(true) {
+  SetIsSafe();
+  if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) {
+  SetIsSafe();
+  if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::~Mutex()       { if (destroy_) SAFE_PTHREAD(pthread_rwlock_destroy); }
+void Mutex::Lock()         { SAFE_PTHREAD(pthread_rwlock_wrlock); }
+void Mutex::Unlock()       { SAFE_PTHREAD(pthread_rwlock_unlock); }
+bool Mutex::TryLock()      { return is_safe_ ?
+                               pthread_rwlock_trywrlock(&mutex_) == 0 : true; }
+void Mutex::ReaderLock()   { SAFE_PTHREAD(pthread_rwlock_rdlock); }
+void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
+#undef SAFE_PTHREAD
+
+#elif defined(HAVE_PTHREAD)
+
+#define SAFE_PTHREAD(fncall)  do {   /* run fncall if is_safe_ is true */  \
+  if (is_safe_ && fncall(&mutex_) != 0) abort();                           \
+} while (0)
+
+Mutex::Mutex() : destroy_(true) {
+  SetIsSafe();
+  if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) {
+  SetIsSafe();
+  if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::~Mutex()       { if (destroy_) SAFE_PTHREAD(pthread_mutex_destroy); }
+void Mutex::Lock()         { SAFE_PTHREAD(pthread_mutex_lock); }
+void Mutex::Unlock()       { SAFE_PTHREAD(pthread_mutex_unlock); }
+bool Mutex::TryLock()      { return is_safe_ ?
+                                 pthread_mutex_trylock(&mutex_) == 0 : true; }
+void Mutex::ReaderLock()   { Lock(); }
+void Mutex::ReaderUnlock() { Unlock(); }
+#undef SAFE_PTHREAD
+
+#endif
+
+// --------------------------------------------------------------------------
+// Some helper classes
+
+// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
+class MutexLock {
+ public:
+  explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
+  ~MutexLock() { mu_->Unlock(); }
+ private:
+  Mutex * const mu_;
+  // Disallow "evil" constructors
+  MutexLock(const MutexLock&);
+  void operator=(const MutexLock&);
+};
+
+// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
+class ReaderMutexLock {
+ public:
+  explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
+  ~ReaderMutexLock() { mu_->ReaderUnlock(); }
+ private:
+  Mutex * const mu_;
+  // Disallow "evil" constructors
+  ReaderMutexLock(const ReaderMutexLock&);
+  void operator=(const ReaderMutexLock&);
+};
+
+class WriterMutexLock {
+ public:
+  explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
+  ~WriterMutexLock() { mu_->WriterUnlock(); }
+ private:
+  Mutex * const mu_;
+  // Disallow "evil" constructors
+  WriterMutexLock(const WriterMutexLock&);
+  void operator=(const WriterMutexLock&);
+};
+
+// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
+#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
+#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
+#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
+
+}  // namespace MUTEX_NAMESPACE
+
+using namespace MUTEX_NAMESPACE;
+
+#undef MUTEX_NAMESPACE
+
+#endif  /* #define GOOGLE_SIMPLE_MUTEX_H_ */

diff --git a/src/base/spinlock.cc b/src/base/spinlock.cc
new file mode 100644
index 0000000..2021fec
--- /dev/null
+++ b/src/base/spinlock.cc

@@ -0,0 +1,183 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat
+ */
+
+#include <config.h>
+#include "base/spinlock.h"
+#include "base/synchronization_profiling.h"
+#include "base/spinlock_internal.h"
+#include "base/cycleclock.h"
+#include "base/sysinfo.h"   /* for NumCPUs() */
+
+// NOTE on the Lock-state values:
+//
+//   kSpinLockFree represents the unlocked state
+//   kSpinLockHeld represents the locked state with no waiters
+//
+// Values greater than kSpinLockHeld represent the locked state with waiters,
+// where the value is the time the current lock holder had to
+// wait before obtaining the lock.  The kSpinLockSleeper state is a special
+// "locked with waiters" state that indicates that a sleeper needs to
+// be woken, but the thread that just released the lock didn't wait.
+
+static int adaptive_spin_count = 0;
+
+const base::LinkerInitialized SpinLock::LINKER_INITIALIZED =
+    base::LINKER_INITIALIZED;
+
+namespace {
+struct SpinLock_InitHelper {
+  SpinLock_InitHelper() {
+    // On multi-cpu machines, spin for longer before yielding
+    // the processor or sleeping.  Reduces idle time significantly.
+    if (NumCPUs() > 1) {
+      adaptive_spin_count = 1000;
+    }
+  }
+};
+
+// Hook into global constructor execution:
+// We do not do adaptive spinning before that,
+// but nothing lock-intensive should be going on at that time.
+static SpinLock_InitHelper init_helper;
+
+}  // unnamed namespace
+
+// Monitor the lock to see if its value changes within some time period
+// (adaptive_spin_count loop iterations).  A timestamp indicating
+// when the thread initially started waiting for the lock is passed in via
+// the initial_wait_timestamp value.  The total wait time in cycles for the
+// lock is returned in the wait_cycles parameter.  The last value read
+// from the lock is returned from the method.
+Atomic32 SpinLock::SpinLoop(int64 initial_wait_timestamp,
+                            Atomic32* wait_cycles) {
+  int c = adaptive_spin_count;
+  while (base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree && --c > 0) {
+  }
+  Atomic32 spin_loop_wait_cycles = CalculateWaitCycles(initial_wait_timestamp);
+  Atomic32 lock_value =
+      base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
+                                           spin_loop_wait_cycles);
+  *wait_cycles = spin_loop_wait_cycles;
+  return lock_value;
+}
+
+void SpinLock::SlowLock() {
+  // The lock was not obtained initially, so this thread needs to wait for
+  // it.  Record the current timestamp in the local variable wait_start_time
+  // so the total wait time can be stored in the lockword once this thread
+  // obtains the lock.
+  int64 wait_start_time = CycleClock::Now();
+  Atomic32 wait_cycles;
+  Atomic32 lock_value = SpinLoop(wait_start_time, &wait_cycles);
+
+  int lock_wait_call_count = 0;
+  while (lock_value != kSpinLockFree) {
+    // If the lock is currently held, but not marked as having a sleeper, mark
+    // it as having a sleeper.
+    if (lock_value == kSpinLockHeld) {
+      // Here, just "mark" that the thread is going to sleep.  Don't store the
+      // lock wait time in the lock as that will cause the current lock
+      // owner to think it experienced contention.
+      lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
+                                                        kSpinLockHeld,
+                                                        kSpinLockSleeper);
+      if (lock_value == kSpinLockHeld) {
+        // Successfully transitioned to kSpinLockSleeper.  Pass
+        // kSpinLockSleeper to the SpinLockWait routine to properly indicate
+        // the last lock_value observed.
+        lock_value = kSpinLockSleeper;
+      } else if (lock_value == kSpinLockFree) {
+        // Lock is free again, so try and acquire it before sleeping.  The
+        // new lock state will be the number of cycles this thread waited if
+        // this thread obtains the lock.
+        lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
+                                                          kSpinLockFree,
+                                                          wait_cycles);
+        continue;  // skip the delay at the end of the loop
+      }
+    }
+
+    // Wait for an OS specific delay.
+    base::internal::SpinLockDelay(&lockword_, lock_value,
+                                  ++lock_wait_call_count);
+    // Spin again after returning from the wait routine to give this thread
+    // some chance of obtaining the lock.
+    lock_value = SpinLoop(wait_start_time, &wait_cycles);
+  }
+}
+
+// The wait time for contentionz lock profiling must fit into 32 bits.
+// However, the lower 32-bits of the cycle counter wrap around too quickly
+// with high frequency processors, so a right-shift by 7 is performed to
+// quickly divide the cycles by 128.  Using these 32 bits, reduces the
+// granularity of time measurement to 128 cycles, and loses track
+// of wait time for waits greater than 109 seconds on a 5 GHz machine
+// [(2^32 cycles/5 Ghz)*128 = 109.95 seconds]. Waits this long should be
+// very rare and the reduced granularity should not be an issue given
+// processors in the Google fleet operate at a minimum of one billion
+// cycles/sec.
+enum { PROFILE_TIMESTAMP_SHIFT = 7 };
+
+void SpinLock::SlowUnlock(uint64 wait_cycles) {
+  base::internal::SpinLockWake(&lockword_, false);  // wake waiter if necessary
+
+  // Collect contentionz profile info, expanding the wait_cycles back out to
+  // the full value.  If wait_cycles is <= kSpinLockSleeper, then no wait
+  // was actually performed, so don't record the wait time.  Note, that the
+  // CalculateWaitCycles method adds in kSpinLockSleeper cycles
+  // unconditionally to guarantee the wait time is not kSpinLockFree or
+  // kSpinLockHeld.  The adding in of these small number of cycles may
+  // overestimate the contention by a slight amount 50% of the time.  However,
+  // if this code tried to correct for that addition by subtracting out the
+  // kSpinLockSleeper amount that would underestimate the contention slightly
+  // 50% of the time.  Both ways get the wrong answer, so the code
+  // overestimates to be more conservative. Overestimating also makes the code
+  // a little simpler.
+  //
+  if (wait_cycles > kSpinLockSleeper) {
+    base::SubmitSpinLockProfileData(this,
+                                    wait_cycles << PROFILE_TIMESTAMP_SHIFT);
+  }
+}
+
+inline int32 SpinLock::CalculateWaitCycles(int64 wait_start_time) {
+  int32 wait_cycles = ((CycleClock::Now() - wait_start_time) >>
+                       PROFILE_TIMESTAMP_SHIFT);
+  // The number of cycles waiting for the lock is used as both the
+  // wait_cycles and lock value, so it can't be kSpinLockFree or
+  // kSpinLockHeld.  Make sure the value returned is at least
+  // kSpinLockSleeper.
+  wait_cycles |= kSpinLockSleeper;
+  return wait_cycles;
+}

diff --git a/src/base/spinlock.h b/src/base/spinlock.h
new file mode 100644
index 0000000..033a75e
--- /dev/null
+++ b/src/base/spinlock.h

@@ -0,0 +1,146 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat
+ */
+
+// SpinLock is async signal safe.
+// If used within a signal handler, all lock holders
+// should block the signal even outside the signal handler.
+
+#ifndef BASE_SPINLOCK_H_
+#define BASE_SPINLOCK_H_
+
+#include <config.h>
+#include "base/atomicops.h"
+#include "base/basictypes.h"
+#include "base/dynamic_annotations.h"
+#include "base/thread_annotations.h"
+
+class LOCKABLE SpinLock {
+ public:
+  SpinLock() : lockword_(kSpinLockFree) { }
+
+  // Special constructor for use with static SpinLock objects.  E.g.,
+  //
+  //    static SpinLock lock(base::LINKER_INITIALIZED);
+  //
+  // When intialized using this constructor, we depend on the fact
+  // that the linker has already initialized the memory appropriately.
+  // A SpinLock constructed like this can be freely used from global
+  // initializers without worrying about the order in which global
+  // initializers run.
+  explicit SpinLock(base::LinkerInitialized /*x*/) {
+    // Does nothing; lockword_ is already initialized
+  }
+
+  // Acquire this SpinLock.
+  // TODO(csilvers): uncomment the annotation when we figure out how to
+  //                 support this macro with 0 args (see thread_annotations.h)
+  inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ {
+    if (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
+                                             kSpinLockHeld) != kSpinLockFree) {
+      SlowLock();
+    }
+    ANNOTATE_RWLOCK_ACQUIRED(this, 1);
+  }
+
+  // Try to acquire this SpinLock without blocking and return true if the
+  // acquisition was successful.  If the lock was not acquired, false is
+  // returned.  If this SpinLock is free at the time of the call, TryLock
+  // will return true with high probability.
+  inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) {
+    bool res =
+        (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
+                                              kSpinLockHeld) == kSpinLockFree);
+    if (res) {
+      ANNOTATE_RWLOCK_ACQUIRED(this, 1);
+    }
+    return res;
+  }
+
+  // Release this SpinLock, which must be held by the calling thread.
+  // TODO(csilvers): uncomment the annotation when we figure out how to
+  //                 support this macro with 0 args (see thread_annotations.h)
+  inline void Unlock() /*UNLOCK_FUNCTION()*/ {
+    ANNOTATE_RWLOCK_RELEASED(this, 1);
+    uint64 wait_cycles = static_cast<uint64>(
+        base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree));
+    if (wait_cycles != kSpinLockHeld) {
+      // Collect contentionz profile info, and speed the wakeup of any waiter.
+      // The wait_cycles value indicates how long this thread spent waiting
+      // for the lock.
+      SlowUnlock(wait_cycles);
+    }
+  }
+
+  // Determine if the lock is held.  When the lock is held by the invoking
+  // thread, true will always be returned. Intended to be used as
+  // CHECK(lock.IsHeld()).
+  inline bool IsHeld() const {
+    return base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree;
+  }
+
+  static const base::LinkerInitialized LINKER_INITIALIZED;  // backwards compat
+ private:
+  enum { kSpinLockFree = 0 };
+  enum { kSpinLockHeld = 1 };
+  enum { kSpinLockSleeper = 2 };
+
+  volatile Atomic32 lockword_;
+
+  void SlowLock();
+  void SlowUnlock(uint64 wait_cycles);
+  Atomic32 SpinLoop(int64 initial_wait_timestamp, Atomic32* wait_cycles);
+  inline int32 CalculateWaitCycles(int64 wait_start_time);
+
+  DISALLOW_COPY_AND_ASSIGN(SpinLock);
+};
+
+// Corresponding locker object that arranges to acquire a spinlock for
+// the duration of a C++ scope.
+class SCOPED_LOCKABLE SpinLockHolder {
+ private:
+  SpinLock* lock_;
+ public:
+  inline explicit SpinLockHolder(SpinLock* l) EXCLUSIVE_LOCK_FUNCTION(l)
+      : lock_(l) {
+    l->Lock();
+  }
+  // TODO(csilvers): uncomment the annotation when we figure out how to
+  //                 support this macro with 0 args (see thread_annotations.h)
+  inline ~SpinLockHolder() /*UNLOCK_FUNCTION()*/ { lock_->Unlock(); }
+};
+// Catch bug where variable name is omitted, e.g. SpinLockHolder (&lock);
+#define SpinLockHolder(x) COMPILE_ASSERT(0, spin_lock_decl_missing_var_name)
+
+
+#endif  // BASE_SPINLOCK_H_

diff --git a/src/base/spinlock_internal.cc b/src/base/spinlock_internal.cc
new file mode 100644
index 0000000..e090f9b
--- /dev/null
+++ b/src/base/spinlock_internal.cc

@@ -0,0 +1,122 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// The OS-specific header included below must provide two calls:
+// base::internal::SpinLockDelay() and base::internal::SpinLockWake().
+// See spinlock_internal.h for the spec of SpinLockWake().
+
+// void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop)
+// SpinLockDelay() generates an apprproate spin delay on iteration "loop" of a
+// spin loop on location *w, whose previously observed value was "value".
+// SpinLockDelay() may do nothing, may yield the CPU, may sleep a clock tick,
+// or may wait for a delay that can be truncated by a call to SpinlockWake(w).
+// In all cases, it must return in bounded time even if SpinlockWake() is not
+// called.
+
+#include "base/spinlock_internal.h"
+
+// forward declaration for use by spinlock_*-inl.h
+namespace base { namespace internal { static int SuggestedDelayNS(int loop); }}
+
+#if defined(_WIN32)
+#include "base/spinlock_win32-inl.h"
+#elif defined(__linux__)
+#include "base/spinlock_linux-inl.h"
+#else
+#include "base/spinlock_posix-inl.h"
+#endif
+
+namespace base {
+namespace internal {
+
+// See spinlock_internal.h for spec.
+int32 SpinLockWait(volatile Atomic32 *w, int n,
+                   const SpinLockWaitTransition trans[]) {
+  int32 v;
+  bool done = false;
+  for (int loop = 0; !done; loop++) {
+    v = base::subtle::Acquire_Load(w);
+    int i;
+    for (i = 0; i != n && v != trans[i].from; i++) {
+    }
+    if (i == n) {
+      SpinLockDelay(w, v, loop);     // no matching transition
+    } else if (trans[i].to == v ||   // null transition
+               base::subtle::Acquire_CompareAndSwap(w, v, trans[i].to) == v) {
+      done = trans[i].done;
+    }
+  }
+  return v;
+}
+
+// Return a suggested delay in nanoseconds for iteration number "loop"
+static int SuggestedDelayNS(int loop) {
+  // Weak pseudo-random number generator to get some spread between threads
+  // when many are spinning.
+#ifdef BASE_HAS_ATOMIC64
+  static base::subtle::Atomic64 rand;
+  uint64 r = base::subtle::NoBarrier_Load(&rand);
+  r = 0x5deece66dLL * r + 0xb;   // numbers from nrand48()
+  base::subtle::NoBarrier_Store(&rand, r);
+
+  r <<= 16;   // 48-bit random number now in top 48-bits.
+  if (loop < 0 || loop > 32) {   // limit loop to 0..32
+    loop = 32;
+  }
+  // loop>>3 cannot exceed 4 because loop cannot exceed 32.
+  // Select top 20..24 bits of lower 48 bits,
+  // giving approximately 0ms to 16ms.
+  // Mean is exponential in loop for first 32 iterations, then 8ms.
+  // The futex path multiplies this by 16, since we expect explicit wakeups
+  // almost always on that path.
+  return r >> (44 - (loop >> 3));
+#else
+  static Atomic32 rand;
+  uint32 r = base::subtle::NoBarrier_Load(&rand);
+  r = 0x343fd * r + 0x269ec3;   // numbers from MSVC++
+  base::subtle::NoBarrier_Store(&rand, r);
+
+  r <<= 1;   // 31-bit random number now in top 31-bits.
+  if (loop < 0 || loop > 32) {   // limit loop to 0..32
+    loop = 32;
+  }
+  // loop>>3 cannot exceed 4 because loop cannot exceed 32.
+  // Select top 20..24 bits of lower 31 bits,
+  // giving approximately 0ms to 16ms.
+  // Mean is exponential in loop for first 32 iterations, then 8ms.
+  // The futex path multiplies this by 16, since we expect explicit wakeups
+  // almost always on that path.
+  return r >> (12 - (loop >> 3));
+#endif
+}
+
+} // namespace internal
+} // namespace base

diff --git a/src/base/spinlock_internal.h b/src/base/spinlock_internal.h
new file mode 100644
index 0000000..4d3c17f
--- /dev/null
+++ b/src/base/spinlock_internal.h

@@ -0,0 +1,65 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * This file is an internal part spinlock.cc and once.cc
+ * It may not be used directly by code outside of //base.
+ */
+
+#ifndef BASE_SPINLOCK_INTERNAL_H_
+#define BASE_SPINLOCK_INTERNAL_H_
+
+#include <config.h>
+#include "base/basictypes.h"
+#include "base/atomicops.h"
+
+namespace base {
+namespace internal {
+
+// SpinLockWait() waits until it can perform one of several transitions from
+// "from" to "to".  It returns when it performs a transition where done==true.
+struct SpinLockWaitTransition {
+  int32 from;
+  int32 to;
+  bool done;
+};
+
+// Wait until *w can transition from trans[i].from to trans[i].to for some i
+// satisfying 0<=i<n && trans[i].done, atomically make the transition,
+// then return the old value of *w.   Make any other atomic tranistions
+// where !trans[i].done, but continue waiting.
+int32 SpinLockWait(volatile Atomic32 *w, int n,
+                   const SpinLockWaitTransition trans[]);
+void SpinLockWake(volatile Atomic32 *w, bool all);
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop);
+
+} // namespace internal
+} // namespace base
+#endif

diff --git a/src/base/spinlock_linux-inl.h b/src/base/spinlock_linux-inl.h
new file mode 100644
index 0000000..aadf62a
--- /dev/null
+++ b/src/base/spinlock_linux-inl.h

@@ -0,0 +1,101 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2009, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * This file is a Linux-specific part of spinlock_internal.cc
+ */
+
+#include <errno.h>
+#include <sched.h>
+#include <time.h>
+#include <limits.h>
+#include "base/linux_syscall_support.h"
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_PRIVATE_FLAG 128
+
+static bool have_futex;
+static int futex_private_flag = FUTEX_PRIVATE_FLAG;
+
+namespace {
+static struct InitModule {
+  InitModule() {
+    int x = 0;
+    // futexes are ints, so we can use them only when
+    // that's the same size as the lockword_ in SpinLock.
+    have_futex = (sizeof (Atomic32) == sizeof (int) &&
+                  sys_futex(&x, FUTEX_WAKE, 1, NULL, NULL, 0) >= 0);
+    if (have_futex &&
+        sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, NULL, NULL, 0) < 0) {
+      futex_private_flag = 0;
+    }
+  }
+} init_module;
+
+}  // anonymous namespace
+
+
+namespace base {
+namespace internal {
+
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
+  if (loop != 0) {
+    int save_errno = errno;
+    struct timespec tm;
+    tm.tv_sec = 0;
+    if (have_futex) {
+      tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
+    } else {
+      tm.tv_nsec = 2000001;   // above 2ms so linux 2.4 doesn't spin
+    }
+    if (have_futex) {
+      tm.tv_nsec *= 16;  // increase the delay; we expect explicit wakeups
+      sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
+                FUTEX_WAIT | futex_private_flag,
+                value, reinterpret_cast<struct kernel_timespec *>(&tm),
+                NULL, 0);
+    } else {
+      nanosleep(&tm, NULL);
+    }
+    errno = save_errno;
+  }
+}
+
+void SpinLockWake(volatile Atomic32 *w, bool all) {
+  if (have_futex) {
+    sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
+              FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1,
+              NULL, NULL, 0);
+  }
+}
+
+} // namespace internal
+} // namespace base

diff --git a/src/base/spinlock_posix-inl.h b/src/base/spinlock_posix-inl.h
new file mode 100644
index 0000000..e73a30f
--- /dev/null
+++ b/src/base/spinlock_posix-inl.h

@@ -0,0 +1,63 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2009, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * This file is a Posix-specific part of spinlock_internal.cc
+ */
+
+#include <config.h>
+#include <errno.h>
+#ifdef HAVE_SCHED_H
+#include <sched.h>      /* For sched_yield() */
+#endif
+#include <time.h>       /* For nanosleep() */
+
+namespace base {
+namespace internal {
+
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
+  int save_errno = errno;
+  if (loop == 0) {
+  } else if (loop == 1) {
+    sched_yield();
+  } else {
+    struct timespec tm;
+    tm.tv_sec = 0;
+    tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
+    nanosleep(&tm, NULL);
+  }
+  errno = save_errno;
+}
+
+void SpinLockWake(volatile Atomic32 *w, bool all) {
+}
+
+} // namespace internal
+} // namespace base

diff --git a/src/base/spinlock_win32-inl.h b/src/base/spinlock_win32-inl.h
new file mode 100644
index 0000000..956b965
--- /dev/null
+++ b/src/base/spinlock_win32-inl.h

@@ -0,0 +1,54 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2009, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * This file is a Win32-specific part of spinlock_internal.cc
+ */
+
+
+#include <windows.h>
+
+namespace base {
+namespace internal {
+
+void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
+  if (loop == 0) {
+  } else if (loop == 1) {
+    Sleep(0);
+  } else {
+    Sleep(base::internal::SuggestedDelayNS(loop) / 1000000);
+  }
+}
+
+void SpinLockWake(volatile Atomic32 *w, bool all) {
+}
+
+} // namespace internal
+} // namespace base

diff --git a/src/base/stl_allocator.h b/src/base/stl_allocator.h
new file mode 100644
index 0000000..2345f46
--- /dev/null
+++ b/src/base/stl_allocator.h

@@ -0,0 +1,98 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2006, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Maxim Lifantsev
+ */
+
+
+#ifndef BASE_STL_ALLOCATOR_H_
+#define BASE_STL_ALLOCATOR_H_
+
+#include <config.h>
+
+#include <stddef.h>   // for ptrdiff_t
+#include <limits>
+
+#include "base/logging.h"
+
+// Generic allocator class for STL objects
+// that uses a given type-less allocator Alloc, which must provide:
+//   static void* Alloc::Allocate(size_t size);
+//   static void Alloc::Free(void* ptr, size_t size);
+//
+// STL_Allocator<T, MyAlloc> provides the same thread-safety
+// guarantees as MyAlloc.
+//
+// Usage example:
+//   set<T, less<T>, STL_Allocator<T, MyAlloc> > my_set;
+// CAVEAT: Parts of the code below are probably specific
+//         to the STL version(s) we are using.
+//         The code is simply lifted from what std::allocator<> provides.
+template <typename T, class Alloc>
+class STL_Allocator {
+ public:
+  typedef size_t     size_type;
+  typedef ptrdiff_t  difference_type;
+  typedef T*         pointer;
+  typedef const T*   const_pointer;
+  typedef T&         reference;
+  typedef const T&   const_reference;
+  typedef T          value_type;
+
+  template <class T1> struct rebind {
+    typedef STL_Allocator<T1, Alloc> other;
+  };
+
+  STL_Allocator() { }
+  STL_Allocator(const STL_Allocator&) { }
+  template <class T1> STL_Allocator(const STL_Allocator<T1, Alloc>&) { }
+  ~STL_Allocator() { }
+
+  pointer address(reference x) const { return &x; }
+  const_pointer address(const_reference x) const { return &x; }
+
+  pointer allocate(size_type n, const void* = 0) {
+    RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate");
+    return static_cast<T*>(Alloc::Allocate(n * sizeof(T)));
+  }
+  void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); }
+
+  size_type max_size() const { return size_t(-1) / sizeof(T); }
+
+  void construct(pointer p, const T& val) { ::new(p) T(val); }
+  void construct(pointer p) { ::new(p) T(); }
+  void destroy(pointer p) { p->~T(); }
+
+  // There's no state, so these allocators are always equal
+  bool operator==(const STL_Allocator&) const { return true; }
+};
+
+#endif  // BASE_STL_ALLOCATOR_H_

diff --git a/src/base/synchronization_profiling.h b/src/base/synchronization_profiling.h
new file mode 100644
index 0000000..b495034
--- /dev/null
+++ b/src/base/synchronization_profiling.h

@@ -0,0 +1,51 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+/* Copyright (c) 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Chris Ruemmler
+ */
+
+#ifndef BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_
+#define BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_
+
+#include "base/basictypes.h"
+
+namespace base {
+
+// We can do contention-profiling of SpinLocks, but the code is in
+// mutex.cc, which is not always linked in with spinlock.  Hence we
+// provide a weak definition, which are used if mutex.cc isn't linked in.
+
+// Submit the number of cycles the spinlock spent contending.
+ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64);
+extern void SubmitSpinLockProfileData(const void *contendedlock,
+                                      int64 wait_cycles) {}
+}
+#endif  // BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_

diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc
new file mode 100644
index 0000000..cad751b
--- /dev/null
+++ b/src/base/sysinfo.cc

@@ -0,0 +1,1153 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <config.h>
+#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
+# define PLATFORM_WINDOWS 1
+#endif
+
+#include <ctype.h>    // for isspace()
+#include <stdlib.h>   // for getenv()
+#include <stdio.h>    // for snprintf(), sscanf()
+#include <string.h>   // for memmove(), memchr(), etc.
+#include <fcntl.h>    // for open()
+#include <errno.h>    // for errno
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>   // for read()
+#endif
+#if defined __MACH__          // Mac OS X, almost certainly
+#include <mach-o/dyld.h>      // for iterating over dll's in ProcMapsIter
+#include <mach-o/loader.h>    // for iterating over dll's in ProcMapsIter
+#include <sys/types.h>
+#include <sys/sysctl.h>       // how we figure out numcpu's on OS X
+#elif defined __FreeBSD__
+#include <sys/sysctl.h>
+#elif defined __sun__         // Solaris
+#include <procfs.h>           // for, e.g., prmap_t
+#elif defined(PLATFORM_WINDOWS)
+#include <process.h>          // for getpid() (actually, _getpid())
+#include <shlwapi.h>          // for SHGetValueA()
+#include <tlhelp32.h>         // for Module32First()
+#endif
+#include "base/sysinfo.h"
+#include "base/commandlineflags.h"
+#include "base/dynamic_annotations.h"   // for RunningOnValgrind
+#include "base/logging.h"
+#include "base/cycleclock.h"
+
+#ifdef PLATFORM_WINDOWS
+#ifdef MODULEENTRY32
+// In a change from the usual W-A pattern, there is no A variant of
+// MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
+// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be
+// MODULEENTRY32W.  These #undefs are the only way I see to get back
+// access to the original, ascii struct (and related functions).
+#undef MODULEENTRY32
+#undef Module32First
+#undef Module32Next
+#undef PMODULEENTRY32
+#undef LPMODULEENTRY32
+#endif  /* MODULEENTRY32 */
+// MinGW doesn't seem to define this, perhaps some windowsen don't either.
+#ifndef TH32CS_SNAPMODULE32
+#define TH32CS_SNAPMODULE32  0
+#endif  /* TH32CS_SNAPMODULE32 */
+#endif  /* PLATFORM_WINDOWS */
+
+// Re-run fn until it doesn't cause EINTR.
+#define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
+
+// open/read/close can set errno, which may be illegal at this
+// time, so prefer making the syscalls directly if we can.
+#ifdef HAVE_SYS_SYSCALL_H
+# include <sys/syscall.h>
+#endif
+#ifdef SYS_open   // solaris 11, at least sometimes, only defines SYS_openat
+# define safeopen(filename, mode)  syscall(SYS_open, filename, mode)
+#else
+# define safeopen(filename, mode)  open(filename, mode)
+#endif
+#ifdef SYS_read
+# define saferead(fd, buffer, size)  syscall(SYS_read, fd, buffer, size)
+#else
+# define saferead(fd, buffer, size)  read(fd, buffer, size)
+#endif
+#ifdef SYS_close
+# define safeclose(fd)  syscall(SYS_close, fd)
+#else
+# define safeclose(fd)  close(fd)
+#endif
+
+// ----------------------------------------------------------------------
+// GetenvBeforeMain()
+// GetUniquePathFromEnv()
+//    Some non-trivial getenv-related functions.
+// ----------------------------------------------------------------------
+
+// It's not safe to call getenv() in the malloc hooks, because they
+// might be called extremely early, before libc is done setting up
+// correctly.  In particular, the thread library may not be done
+// setting up errno.  So instead, we use the built-in __environ array
+// if it exists, and otherwise read /proc/self/environ directly, using
+// system calls to read the file, and thus avoid setting errno.
+// /proc/self/environ has a limit of how much data it exports (around
+// 8K), so it's not an ideal solution.
+const char* GetenvBeforeMain(const char* name) {
+#if defined(HAVE___ENVIRON)   // if we have it, it's declared in unistd.h
+  if (__environ) {            // can exist but be NULL, if statically linked
+    const int namelen = strlen(name);
+    for (char** p = __environ; *p; p++) {
+      if (strlen(*p) < namelen) {
+        continue;
+      }
+      if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=')  // it's a match
+        return *p + namelen+1;                                 // point after =
+    }
+    return NULL;
+  }
+#endif
+#if defined(PLATFORM_WINDOWS)
+  // TODO(mbelshe) - repeated calls to this function will overwrite the
+  // contents of the static buffer.
+  static char envvar_buf[1024];  // enough to hold any envvar we care about
+  if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1))
+    return NULL;
+  return envvar_buf;
+#endif
+  // static is ok because this function should only be called before
+  // main(), when we're single-threaded.
+  static char envbuf[16<<10];
+  if (*envbuf == '\0') {    // haven't read the environ yet
+    int fd = safeopen("/proc/self/environ", O_RDONLY);
+    // The -2 below guarantees the last two bytes of the buffer will be \0\0
+    if (fd == -1 ||           // unable to open the file, fall back onto libc
+        saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file
+      RAW_VLOG(1, "Unable to open /proc/self/environ, falling back "
+               "on getenv(\"%s\"), which may not work", name);
+      if (fd != -1) safeclose(fd);
+      return getenv(name);
+    }
+    safeclose(fd);
+  }
+  const int namelen = strlen(name);
+  const char* p = envbuf;
+  while (*p != '\0') {    // will happen at the \0\0 that terminates the buffer
+    // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
+    const char* endp = (char*)memchr(p, '\0', sizeof(envbuf) - (p - envbuf));
+    if (endp == NULL)            // this entry isn't NUL terminated
+      return NULL;
+    else if (!memcmp(p, name, namelen) && p[namelen] == '=')    // it's a match
+      return p + namelen+1;      // point after =
+    p = endp + 1;
+  }
+  return NULL;                   // env var never found
+}
+
+extern "C" {
+  const char* TCMallocGetenvSafe(const char* name) {
+    return GetenvBeforeMain(name);
+  }
+}
+
+// This takes as an argument an environment-variable name (like
+// CPUPROFILE) whose value is supposed to be a file-path, and sets
+// path to that path, and returns true.  If the env var doesn't exist,
+// or is the empty string, leave path unchanged and returns false.
+// The reason this is non-trivial is that this function handles munged
+// pathnames.  Here's why:
+//
+// If we're a child process of the 'main' process, we can't just use
+// getenv("CPUPROFILE") -- the parent process will be using that path.
+// Instead we append our pid to the pathname.  How do we tell if we're a
+// child process?  Ideally we'd set an environment variable that all
+// our children would inherit.  But -- and this is seemingly a bug in
+// gcc -- if you do a setenv() in a shared libarary in a global
+// constructor, the environment setting is lost by the time main() is
+// called.  The only safe thing we can do in such a situation is to
+// modify the existing envvar.  So we do a hack: in the parent, we set
+// the high bit of the 1st char of CPUPROFILE.  In the child, we
+// notice the high bit is set and append the pid().  This works
+// assuming cpuprofile filenames don't normally have the high bit set
+// in their first character!  If that assumption is violated, we'll
+// still get a profile, but one with an unexpected name.
+// TODO(csilvers): set an envvar instead when we can do it reliably.
+bool GetUniquePathFromEnv(const char* env_name, char* path) {
+  char* envval = getenv(env_name);
+  if (envval == NULL || *envval == '\0')
+    return false;
+  if (envval[0] & 128) {                  // high bit is set
+    snprintf(path, PATH_MAX, "%c%s_%u",   // add pid and clear high bit
+             envval[0] & 127, envval+1, (unsigned int)(getpid()));
+  } else {
+    snprintf(path, PATH_MAX, "%s", envval);
+    envval[0] |= 128;                     // set high bit for kids to see
+  }
+  return true;
+}
+
+// ----------------------------------------------------------------------
+// CyclesPerSecond()
+// NumCPUs()
+//    It's important this not call malloc! -- they may be called at
+//    global-construct time, before we've set up all our proper malloc
+//    hooks and such.
+// ----------------------------------------------------------------------
+
+static double cpuinfo_cycles_per_second = 1.0;  // 0.0 might be dangerous
+static int cpuinfo_num_cpus = 1;  // Conservative guess
+
+void SleepForMilliseconds(int milliseconds) {
+#ifdef PLATFORM_WINDOWS
+  _sleep(milliseconds);   // Windows's _sleep takes milliseconds argument
+#else
+  // Sleep for a few milliseconds
+  struct timespec sleep_time;
+  sleep_time.tv_sec = milliseconds / 1000;
+  sleep_time.tv_nsec = (milliseconds % 1000) * 1000000;
+  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
+    ;  // Ignore signals and wait for the full interval to elapse.
+#endif
+}
+
+// Helper function estimates cycles/sec by observing cycles elapsed during
+// sleep(). Using small sleep time decreases accuracy significantly.
+static int64 EstimateCyclesPerSecond(const int estimate_time_ms) {
+  assert(estimate_time_ms > 0);
+  if (estimate_time_ms <= 0)
+    return 1;
+  double multiplier = 1000.0 / (double)estimate_time_ms;  // scale by this much
+
+  const int64 start_ticks = CycleClock::Now();
+  SleepForMilliseconds(estimate_time_ms);
+  const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks));
+  return guess;
+}
+
+// ReadIntFromFile is only called on linux and cygwin platforms.
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+// Helper function for reading an int from a file. Returns true if successful
+// and the memory location pointed to by value is set to the value read.
+static bool ReadIntFromFile(const char *file, int *value) {
+  bool ret = false;
+  int fd = open(file, O_RDONLY);
+  if (fd != -1) {
+    char line[1024];
+    char* err;
+    memset(line, '\0', sizeof(line));
+    read(fd, line, sizeof(line) - 1);
+    const int temp_value = strtol(line, &err, 10);
+    if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
+      *value = temp_value;
+      ret = true;
+    }
+    close(fd);
+  }
+  return ret;
+}
+#endif
+
+// WARNING: logging calls back to InitializeSystemInfo() so it must
+// not invoke any logging code.  Also, InitializeSystemInfo() can be
+// called before main() -- in fact it *must* be since already_called
+// isn't protected -- before malloc hooks are properly set up, so
+// we make an effort not to call any routines which might allocate
+// memory.
+
+static void InitializeSystemInfo() {
+  static bool already_called = false;   // safe if we run before threads
+  if (already_called)  return;
+  already_called = true;
+
+  bool saw_mhz = false;
+
+  if (RunningOnValgrind()) {
+    // Valgrind may slow the progress of time artificially (--scale-time=N
+    // option). We thus can't rely on CPU Mhz info stored in /sys or /proc
+    // files. Thus, actually measure the cps.
+    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100);
+    saw_mhz = true;
+  }
+
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+  char line[1024];
+  char* err;
+  int freq;
+
+  // If the kernel is exporting the tsc frequency use that. There are issues
+  // where cpuinfo_max_freq cannot be relied on because the BIOS may be
+  // exporintg an invalid p-state (on x86) or p-states may be used to put the
+  // processor in a new mode (turbo mode). Essentially, those frequencies
+  // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
+  // well.
+  if (!saw_mhz &&
+      ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
+      // The value is in kHz (as the file name suggests).  For example, on a
+      // 2GHz warpstation, the file contains the value "2000000".
+      cpuinfo_cycles_per_second = freq * 1000.0;
+      saw_mhz = true;
+  }
+
+  // If CPU scaling is in effect, we want to use the *maximum* frequency,
+  // not whatever CPU speed some random processor happens to be using now.
+  if (!saw_mhz &&
+      ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+                      &freq)) {
+    // The value is in kHz.  For example, on a 2GHz machine, the file
+    // contains the value "2000000".
+    cpuinfo_cycles_per_second = freq * 1000.0;
+    saw_mhz = true;
+  }
+
+  // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
+  const char* pname = "/proc/cpuinfo";
+  int fd = open(pname, O_RDONLY);
+  if (fd == -1) {
+    perror(pname);
+    if (!saw_mhz) {
+      cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+    }
+    return;          // TODO: use generic tester instead?
+  }
+
+  double bogo_clock = 1.0;
+  bool saw_bogo = false;
+  int num_cpus = 0;
+  line[0] = line[1] = '\0';
+  int chars_read = 0;
+  do {   // we'll exit when the last read didn't read anything
+    // Move the next line to the beginning of the buffer
+    const int oldlinelen = strlen(line);
+    if (sizeof(line) == oldlinelen + 1)    // oldlinelen took up entire line
+      line[0] = '\0';
+    else                                   // still other lines left to save
+      memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
+    // Terminate the new line, reading more if we can't find the newline
+    char* newline = strchr(line, '\n');
+    if (newline == NULL) {
+      const int linelen = strlen(line);
+      const int bytes_to_read = sizeof(line)-1 - linelen;
+      assert(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
+      chars_read = read(fd, line + linelen, bytes_to_read);
+      line[linelen + chars_read] = '\0';
+      newline = strchr(line, '\n');
+    }
+    if (newline != NULL)
+      *newline = '\0';
+
+#if defined(__powerpc__) || defined(__ppc__)
+    // PowerPC cpus report the frequency in "clock" line
+    if (strncasecmp(line, "clock", sizeof("clock")-1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+	// PowerPC frequencies are only reported as MHz (check 'show_cpuinfo'
+	// function at arch/powerpc/kernel/setup-common.c)
+	char *endp = strstr(line, "MHz");
+	if (endp) {
+	  *endp = 0;
+	  cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
+          if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
+            saw_mhz = true;
+	}
+      }
+#else
+    // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
+    // accept postive values. Some environments (virtual machines) report zero,
+    // which would cause infinite looping in WallTime_Init.
+    if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+        cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
+        if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
+          saw_mhz = true;
+      }
+    } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
+      const char* freqstr = strchr(line, ':');
+      if (freqstr) {
+        bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
+        if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
+          saw_bogo = true;
+      }
+#endif
+    } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
+      num_cpus++;  // count up every time we see an "processor :" entry
+    }
+  } while (chars_read > 0);
+  close(fd);
+
+  if (!saw_mhz) {
+    if (saw_bogo) {
+      // If we didn't find anything better, we'll use bogomips, but
+      // we're not happy about it.
+      cpuinfo_cycles_per_second = bogo_clock;
+    } else {
+      // If we don't even have bogomips, we'll use the slow estimation.
+      cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+    }
+  }
+  if (cpuinfo_cycles_per_second == 0.0) {
+    cpuinfo_cycles_per_second = 1.0;   // maybe unnecessary, but safe
+  }
+  if (num_cpus > 0) {
+    cpuinfo_num_cpus = num_cpus;
+  }
+
+#elif defined __FreeBSD__
+  // For this sysctl to work, the machine must be configured without
+  // SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
+  // and later.  Before that, it's a 32-bit quantity (and gives the
+  // wrong answer on machines faster than 2^32 Hz).  See
+  //  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
+  // But also compare FreeBSD 7.0:
+  //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
+  //  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
+  // To FreeBSD 6.3 (it's the same in 6-STABLE):
+  //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
+  //  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
+#if __FreeBSD__ >= 7
+  uint64_t hz = 0;
+#else
+  unsigned int hz = 0;
+#endif
+  size_t sz = sizeof(hz);
+  const char *sysctl_path = "machdep.tsc_freq";
+  if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
+    fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
+            sysctl_path, strerror(errno));
+    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+  } else {
+    cpuinfo_cycles_per_second = hz;
+  }
+  // TODO(csilvers): also figure out cpuinfo_num_cpus
+
+#elif defined(PLATFORM_WINDOWS)
+# pragma comment(lib, "shlwapi.lib")  // for SHGetValue()
+  // In NT, read MHz from the registry. If we fail to do so or we're in win9x
+  // then make a crude estimate.
+  OSVERSIONINFO os;
+  os.dwOSVersionInfoSize = sizeof(os);
+  DWORD data, data_size = sizeof(data);
+  if (GetVersionEx(&os) &&
+      os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
+      SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
+                         "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
+                           "~MHz", NULL, &data, &data_size)))
+    cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
+  else
+    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
+
+  // Get the number of processors.
+  SYSTEM_INFO info;
+  GetSystemInfo(&info);
+  cpuinfo_num_cpus = info.dwNumberOfProcessors;
+
+#elif defined(__MACH__) && defined(__APPLE__)
+  // returning "mach time units" per second. the current number of elapsed
+  // mach time units can be found by calling uint64 mach_absolute_time();
+  // while not as precise as actual CPU cycles, it is accurate in the face
+  // of CPU frequency scaling and multi-cpu/core machines.
+  // Our mac users have these types of machines, and accuracy
+  // (i.e. correctness) trumps precision.
+  // See cycleclock.h: CycleClock::Now(), which returns number of mach time
+  // units on Mac OS X.
+  mach_timebase_info_data_t timebase_info;
+  mach_timebase_info(&timebase_info);
+  double mach_time_units_per_nanosecond =
+      static_cast<double>(timebase_info.denom) /
+      static_cast<double>(timebase_info.numer);
+  cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
+
+  int num_cpus = 0;
+  size_t size = sizeof(num_cpus);
+  int numcpus_name[] = { CTL_HW, HW_NCPU };
+  if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
+      == 0
+      && (size == sizeof(num_cpus)))
+    cpuinfo_num_cpus = num_cpus;
+
+#else
+  // Generic cycles per second counter
+  cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
+#endif
+}
+
+double CyclesPerSecond(void) {
+  InitializeSystemInfo();
+  return cpuinfo_cycles_per_second;
+}
+
+int NumCPUs(void) {
+  InitializeSystemInfo();
+  return cpuinfo_num_cpus;
+}
+
+// ----------------------------------------------------------------------
+// HasPosixThreads()
+//      Return true if we're running POSIX (e.g., NPTL on Linux)
+//      threads, as opposed to a non-POSIX thread library.  The thing
+//      that we care about is whether a thread's pid is the same as
+//      the thread that spawned it.  If so, this function returns
+//      true.
+// ----------------------------------------------------------------------
+bool HasPosixThreads() {
+#if defined(__linux__)
+#ifndef _CS_GNU_LIBPTHREAD_VERSION
+#define _CS_GNU_LIBPTHREAD_VERSION 3
+#endif
+  char buf[32];
+  //  We assume that, if confstr() doesn't know about this name, then
+  //  the same glibc is providing LinuxThreads.
+  if (confstr(_CS_GNU_LIBPTHREAD_VERSION, buf, sizeof(buf)) == 0)
+    return false;
+  return strncmp(buf, "NPTL", 4) == 0;
+#elif defined(PLATFORM_WINDOWS) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+  return false;
+#else  // other OS
+  return true;      //  Assume that everything else has Posix
+#endif  // else OS_LINUX
+}
+
+// ----------------------------------------------------------------------
+
+#if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__
+static void ConstructFilename(const char* spec, pid_t pid,
+                              char* buf, int buf_size) {
+  CHECK_LT(snprintf(buf, buf_size,
+                    spec,
+                    static_cast<int>(pid ? pid : getpid())), buf_size);
+}
+#endif
+
+// A templatized helper function instantiated for Mach (OS X) only.
+// It can handle finding info for both 32 bits and 64 bits.
+// Returns true if it successfully handled the hdr, false else.
+#ifdef __MACH__          // Mac OS X, almost certainly
+template<uint32_t kMagic, uint32_t kLCSegment,
+         typename MachHeader, typename SegmentCommand>
+static bool NextExtMachHelper(const mach_header* hdr,
+                              int current_image, int current_load_cmd,
+                              uint64 *start, uint64 *end, char **flags,
+                              uint64 *offset, int64 *inode, char **filename,
+                              uint64 *file_mapping, uint64 *file_pages,
+                              uint64 *anon_mapping, uint64 *anon_pages,
+                              dev_t *dev) {
+  static char kDefaultPerms[5] = "r-xp";
+  if (hdr->magic != kMagic)
+    return false;
+  const char* lc = (const char *)hdr + sizeof(MachHeader);
+  // TODO(csilvers): make this not-quadradic (increment and hold state)
+  for (int j = 0; j < current_load_cmd; j++)  // advance to *our* load_cmd
+    lc += ((const load_command *)lc)->cmdsize;
+  if (((const load_command *)lc)->cmd == kLCSegment) {
+    const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image);
+    const SegmentCommand* sc = (const SegmentCommand *)lc;
+    if (start) *start = sc->vmaddr + dlloff;
+    if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
+    if (flags) *flags = kDefaultPerms;  // can we do better?
+    if (offset) *offset = sc->fileoff;
+    if (inode) *inode = 0;
+    if (filename)
+      *filename = const_cast<char*>(_dyld_get_image_name(current_image));
+    if (file_mapping) *file_mapping = 0;
+    if (file_pages) *file_pages = 0;   // could we use sc->filesize?
+    if (anon_mapping) *anon_mapping = 0;
+    if (anon_pages) *anon_pages = 0;
+    if (dev) *dev = 0;
+    return true;
+  }
+
+  return false;
+}
+#endif
+
+// Finds |c| in |text|, and assign '\0' at the found position.
+// The original character at the modified position should be |c|.
+// A pointer to the modified position is stored in |endptr|.
+// |endptr| should not be NULL.
+static bool ExtractUntilChar(char *text, int c, char **endptr) {
+  CHECK_NE(text, NULL);
+  CHECK_NE(endptr, NULL);
+  char *found;
+  found = strchr(text, c);
+  if (found == NULL) {
+    *endptr = NULL;
+    return false;
+  }
+
+  *endptr = found;
+  *found = '\0';
+  return true;
+}
+
+// Increments |*text_pointer| while it points a whitespace character.
+// It is to follow sscanf's whilespace handling.
+static void SkipWhileWhitespace(char **text_pointer, int c) {
+  if (isspace(c)) {
+    while (isspace(**text_pointer) && isspace(*((*text_pointer) + 1))) {
+      ++(*text_pointer);
+    }
+  }
+}
+
+template<class T>
+static T StringToInteger(char *text, char **endptr, int base) {
+  assert(false);
+  return T();
+}
+
+template<>
+int StringToInteger<int>(char *text, char **endptr, int base) {
+  return strtol(text, endptr, base);
+}
+
+template<>
+int64 StringToInteger<int64>(char *text, char **endptr, int base) {
+  return strtoll(text, endptr, base);
+}
+
+template<>
+uint64 StringToInteger<uint64>(char *text, char **endptr, int base) {
+  return strtoull(text, endptr, base);
+}
+
+template<typename T>
+static T StringToIntegerUntilChar(
+    char *text, int base, int c, char **endptr_result) {
+  CHECK_NE(endptr_result, NULL);
+  *endptr_result = NULL;
+
+  char *endptr_extract;
+  if (!ExtractUntilChar(text, c, &endptr_extract))
+    return 0;
+
+  T result;
+  char *endptr_strto;
+  result = StringToInteger<T>(text, &endptr_strto, base);
+  *endptr_extract = c;
+
+  if (endptr_extract != endptr_strto)
+    return 0;
+
+  *endptr_result = endptr_extract;
+  SkipWhileWhitespace(endptr_result, c);
+
+  return result;
+}
+
+static char *CopyStringUntilChar(
+    char *text, unsigned out_len, int c, char *out) {
+  char *endptr;
+  if (!ExtractUntilChar(text, c, &endptr))
+    return NULL;
+
+  strncpy(out, text, out_len);
+  out[out_len-1] = '\0';
+  *endptr = c;
+
+  SkipWhileWhitespace(&endptr, c);
+  return endptr;
+}
+
+template<typename T>
+static bool StringToIntegerUntilCharWithCheck(
+    T *outptr, char *text, int base, int c, char **endptr) {
+  *outptr = StringToIntegerUntilChar<T>(*endptr, base, c, endptr);
+  if (*endptr == NULL || **endptr == '\0') return false;
+  ++(*endptr);
+  return true;
+}
+
+static bool ParseProcMapsLine(char *text, uint64 *start, uint64 *end,
+                              char *flags, uint64 *offset,
+                              int *major, int *minor, int64 *inode,
+                              unsigned *filename_offset) {
+#if defined(__linux__)
+  /*
+   * It's similar to:
+   * sscanf(text, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
+   *        start, end, flags, offset, major, minor, inode, filename_offset)
+   */
+  char *endptr = text;
+  if (endptr == NULL || *endptr == '\0')  return false;
+
+  if (!StringToIntegerUntilCharWithCheck(start, endptr, 16, '-', &endptr))
+    return false;
+
+  if (!StringToIntegerUntilCharWithCheck(end, endptr, 16, ' ', &endptr))
+    return false;
+
+  endptr = CopyStringUntilChar(endptr, 5, ' ', flags);
+  if (endptr == NULL || *endptr == '\0')  return false;
+  ++endptr;
+
+  if (!StringToIntegerUntilCharWithCheck(offset, endptr, 16, ' ', &endptr))
+    return false;
+
+  if (!StringToIntegerUntilCharWithCheck(major, endptr, 16, ':', &endptr))
+    return false;
+
+  if (!StringToIntegerUntilCharWithCheck(minor, endptr, 16, ' ', &endptr))
+    return false;
+
+  if (!StringToIntegerUntilCharWithCheck(inode, endptr, 10, ' ', &endptr))
+    return false;
+
+  *filename_offset = (endptr - text);
+  return true;
+#else
+  return false;
+#endif
+}
+
+ProcMapsIterator::ProcMapsIterator(pid_t pid) {
+  Init(pid, NULL, false);
+}
+
+ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) {
+  Init(pid, buffer, false);
+}
+
+ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer,
+                                   bool use_maps_backing) {
+  Init(pid, buffer, use_maps_backing);
+}
+
+void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
+                            bool use_maps_backing) {
+  pid_ = pid;
+  using_maps_backing_ = use_maps_backing;
+  dynamic_buffer_ = NULL;
+  if (!buffer) {
+    // If the user didn't pass in any buffer storage, allocate it
+    // now. This is the normal case; the signal handler passes in a
+    // static buffer.
+    buffer = dynamic_buffer_ = new Buffer;
+  } else {
+    dynamic_buffer_ = NULL;
+  }
+
+  ibuf_ = buffer->buf_;
+
+  stext_ = etext_ = nextline_ = ibuf_;
+  ebuf_ = ibuf_ + Buffer::kBufSize - 1;
+  nextline_ = ibuf_;
+
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+  if (use_maps_backing) {  // don't bother with clever "self" stuff in this case
+    ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize);
+  } else if (pid == 0) {
+    // We have to kludge a bit to deal with the args ConstructFilename
+    // expects.  The 1 is never used -- it's only impt. that it's not 0.
+    ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize);
+  } else {
+    ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize);
+  }
+  // No error logging since this can be called from the crash dump
+  // handler at awkward moments. Users should call Valid() before
+  // using.
+  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
+#elif defined(__FreeBSD__)
+  // We don't support maps_backing on freebsd
+  if (pid == 0) {
+    ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize);
+  } else {
+    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
+  }
+  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
+#elif defined(__sun__)
+  if (pid == 0) {
+    ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize);
+  } else {
+    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
+  }
+  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
+#elif defined(__MACH__)
+  current_image_ = _dyld_image_count();   // count down from the top
+  current_load_cmd_ = -1;
+#elif defined(PLATFORM_WINDOWS)
+  snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE |
+                                       TH32CS_SNAPMODULE32,
+                                       GetCurrentProcessId());
+  memset(&module_, 0, sizeof(module_));
+#else
+  fd_ = -1;   // so Valid() is always false
+#endif
+
+}
+
+ProcMapsIterator::~ProcMapsIterator() {
+#if defined(PLATFORM_WINDOWS)
+  if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_);
+#elif defined(__MACH__)
+  // no cleanup necessary!
+#else
+  if (fd_ >= 0) NO_INTR(close(fd_));
+#endif
+  delete dynamic_buffer_;
+}
+
+bool ProcMapsIterator::Valid() const {
+#if defined(PLATFORM_WINDOWS)
+  return snapshot_ != INVALID_HANDLE_VALUE;
+#elif defined(__MACH__)
+  return 1;
+#else
+  return fd_ != -1;
+#endif
+}
+
+bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags,
+                            uint64 *offset, int64 *inode, char **filename) {
+  return NextExt(start, end, flags, offset, inode, filename, NULL, NULL,
+                 NULL, NULL, NULL);
+}
+
+// This has too many arguments.  It should really be building
+// a map object and returning it.  The problem is that this is called
+// when the memory allocator state is undefined, hence the arguments.
+bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
+                               uint64 *offset, int64 *inode, char **filename,
+                               uint64 *file_mapping, uint64 *file_pages,
+                               uint64 *anon_mapping, uint64 *anon_pages,
+                               dev_t *dev) {
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+  do {
+    // Advance to the start of the next line
+    stext_ = nextline_;
+
+    // See if we have a complete line in the buffer already
+    nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_));
+    if (!nextline_) {
+      // Shift/fill the buffer so we do have a line
+      int count = etext_ - stext_;
+
+      // Move the current text to the start of the buffer
+      memmove(ibuf_, stext_, count);
+      stext_ = ibuf_;
+      etext_ = ibuf_ + count;
+
+      int nread = 0;            // fill up buffer with text
+      while (etext_ < ebuf_) {
+        NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_));
+        if (nread > 0)
+          etext_ += nread;
+        else
+          break;
+      }
+
+      // Zero out remaining characters in buffer at EOF to avoid returning
+      // garbage from subsequent calls.
+      if (etext_ != ebuf_ && nread == 0) {
+        memset(etext_, 0, ebuf_ - etext_);
+      }
+      *etext_ = '\n';   // sentinel; safe because ibuf extends 1 char beyond ebuf
+      nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_));
+    }
+    *nextline_ = 0;                // turn newline into nul
+    nextline_ += ((nextline_ < etext_)? 1 : 0);  // skip nul if not end of text
+    // stext_ now points at a nul-terminated line
+    uint64 tmpstart, tmpend, tmpoffset;
+    int64 tmpinode;
+    int major, minor;
+    unsigned filename_offset = 0;
+#if defined(__linux__)
+    // for now, assume all linuxes have the same format
+    if (!ParseProcMapsLine(
+        stext_,
+        start ? start : &tmpstart,
+        end ? end : &tmpend,
+        flags_,
+        offset ? offset : &tmpoffset,
+        &major, &minor,
+        inode ? inode : &tmpinode, &filename_offset)) continue;
+#elif defined(__CYGWIN__) || defined(__CYGWIN32__)
+    // cygwin is like linux, except the third field is the "entry point"
+    // rather than the offset (see format_process_maps at
+    // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src
+    // Offset is always be 0 on cygwin: cygwin implements an mmap
+    // by loading the whole file and then calling NtMapViewOfSection.
+    // Cygwin also seems to set its flags kinda randomly; use windows default.
+    char tmpflags[5];
+    if (offset)
+      *offset = 0;
+    strcpy(flags_, "r-xp");
+    if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
+               start ? start : &tmpstart,
+               end ? end : &tmpend,
+               tmpflags,
+               &tmpoffset,
+               &major, &minor,
+               inode ? inode : &tmpinode, &filename_offset) != 7) continue;
+#elif defined(__FreeBSD__)
+    // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup
+    tmpstart = tmpend = tmpoffset = 0;
+    tmpinode = 0;
+    major = minor = 0;   // can't get this info in freebsd
+    if (inode)
+      *inode = 0;        // nor this
+    if (offset)
+      *offset = 0;       // seems like this should be in there, but maybe not
+    // start end resident privateresident obj(?) prot refcnt shadowcnt
+    // flags copy_on_write needs_copy type filename:
+    // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat
+    if (sscanf(stext_, "0x%" SCNx64 " 0x%" SCNx64 " %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n",
+               start ? start : &tmpstart,
+               end ? end : &tmpend,
+               flags_,
+               &filename_offset) != 3) continue;
+#endif
+
+    // Depending on the Linux kernel being used, there may or may not be a space
+    // after the inode if there is no filename.  sscanf will in such situations
+    // nondeterministically either fill in filename_offset or not (the results
+    // differ on multiple calls in the same run even with identical arguments).
+    // We don't want to wander off somewhere beyond the end of the string.
+    size_t stext_length = strlen(stext_);
+    if (filename_offset == 0 || filename_offset > stext_length)
+      filename_offset = stext_length;
+
+    // We found an entry
+    if (flags) *flags = flags_;
+    if (filename) *filename = stext_ + filename_offset;
+    if (dev) *dev = minor | (major << 8);
+
+    if (using_maps_backing_) {
+      // Extract and parse physical page backing info.
+      char *backing_ptr = stext_ + filename_offset +
+          strlen(stext_+filename_offset);
+
+      // find the second '('
+      int paren_count = 0;
+      while (--backing_ptr > stext_) {
+        if (*backing_ptr == '(') {
+          ++paren_count;
+          if (paren_count >= 2) {
+            uint64 tmp_file_mapping;
+            uint64 tmp_file_pages;
+            uint64 tmp_anon_mapping;
+            uint64 tmp_anon_pages;
+
+            sscanf(backing_ptr+1, "F %" SCNx64 " %" SCNd64 ") (A %" SCNx64 " %" SCNd64 ")",
+                   file_mapping ? file_mapping : &tmp_file_mapping,
+                   file_pages ? file_pages : &tmp_file_pages,
+                   anon_mapping ? anon_mapping : &tmp_anon_mapping,
+                   anon_pages ? anon_pages : &tmp_anon_pages);
+            // null terminate the file name (there is a space
+            // before the first (.
+            backing_ptr[-1] = 0;
+            break;
+          }
+        }
+      }
+    }
+
+    return true;
+  } while (etext_ > ibuf_);
+#elif defined(__sun__)
+  // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1
+  static char kPerms[8][4] = { "---", "--x", "-w-", "-wx",
+                               "r--", "r-x", "rw-", "rwx" };
+  COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4);
+  COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2);
+  COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1);
+  Buffer object_path;
+  int nread = 0;            // fill up buffer with text
+  NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t)));
+  if (nread == sizeof(prmap_t)) {
+    long inode_from_mapname = 0;
+    prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_);
+    // Best-effort attempt to get the inode from the filename.  I think the
+    // two middle ints are major and minor device numbers, but I'm not sure.
+    sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname);
+
+    if (pid_ == 0) {
+      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
+                        "/proc/self/path/%s", mapinfo->pr_mapname),
+               Buffer::kBufSize);
+    } else {
+      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
+                        "/proc/%d/path/%s",
+                        static_cast<int>(pid_), mapinfo->pr_mapname),
+               Buffer::kBufSize);
+    }
+    ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX);
+    CHECK_LT(len, PATH_MAX);
+    if (len < 0)
+      len = 0;
+    current_filename_[len] = '\0';
+
+    if (start) *start = mapinfo->pr_vaddr;
+    if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size;
+    if (flags) *flags = kPerms[mapinfo->pr_mflags & 7];
+    if (offset) *offset = mapinfo->pr_offset;
+    if (inode) *inode = inode_from_mapname;
+    if (filename) *filename = current_filename_;
+    if (file_mapping) *file_mapping = 0;
+    if (file_pages) *file_pages = 0;
+    if (anon_mapping) *anon_mapping = 0;
+    if (anon_pages) *anon_pages = 0;
+    if (dev) *dev = 0;
+    return true;
+  }
+#elif defined(__MACH__)
+  // We return a separate entry for each segment in the DLL. (TODO(csilvers):
+  // can we do better?)  A DLL ("image") has load-commands, some of which
+  // talk about segment boundaries.
+  // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912
+  for (; current_image_ >= 0; current_image_--) {
+    const mach_header* hdr = _dyld_get_image_header(current_image_);
+    if (!hdr) continue;
+    if (current_load_cmd_ < 0)   // set up for this image
+      current_load_cmd_ = hdr->ncmds;  // again, go from the top down
+
+    // We start with the next load command (we've already looked at this one).
+    for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) {
+#ifdef MH_MAGIC_64
+      if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64,
+                            struct mach_header_64, struct segment_command_64>(
+                                hdr, current_image_, current_load_cmd_,
+                                start, end, flags, offset, inode, filename,
+                                file_mapping, file_pages, anon_mapping,
+                                anon_pages, dev)) {
+        return true;
+      }
+#endif
+      if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT,
+                            struct mach_header, struct segment_command>(
+                                hdr, current_image_, current_load_cmd_,
+                                start, end, flags, offset, inode, filename,
+                                file_mapping, file_pages, anon_mapping,
+                                anon_pages, dev)) {
+        return true;
+      }
+    }
+    // If we get here, no more load_cmd's in this image talk about
+    // segments.  Go on to the next image.
+  }
+#elif defined(PLATFORM_WINDOWS)
+  static char kDefaultPerms[5] = "r-xp";
+  BOOL ok;
+  if (module_.dwSize == 0) {  // only possible before first call
+    module_.dwSize = sizeof(module_);
+    ok = Module32First(snapshot_, &module_);
+  } else {
+    ok = Module32Next(snapshot_, &module_);
+  }
+  if (ok) {
+    uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr);
+    if (start) *start = base_addr;
+    if (end) *end = base_addr + module_.modBaseSize;
+    if (flags) *flags = kDefaultPerms;
+    if (offset) *offset = 0;
+    if (inode) *inode = 0;
+    if (filename) *filename = module_.szExePath;
+    if (file_mapping) *file_mapping = 0;
+    if (file_pages) *file_pages = 0;
+    if (anon_mapping) *anon_mapping = 0;
+    if (anon_pages) *anon_pages = 0;
+    if (dev) *dev = 0;
+    return true;
+  }
+#endif
+
+  // We didn't find anything
+  return false;
+}
+
+int ProcMapsIterator::FormatLine(char* buffer, int bufsize,
+                                 uint64 start, uint64 end, const char *flags,
+                                 uint64 offset, int64 inode,
+                                 const char *filename, dev_t dev) {
+  // We assume 'flags' looks like 'rwxp' or 'rwx'.
+  char r = (flags && flags[0] == 'r') ? 'r' : '-';
+  char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-';
+  char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-';
+  // p always seems set on linux, so we set the default to 'p', not '-'
+  char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p')
+      ? '-' : 'p';
+
+  const int rc = snprintf(buffer, bufsize,
+                          "%08" PRIx64 "-%08" PRIx64 " %c%c%c%c %08" PRIx64 " %02x:%02x %-11" PRId64 " %s\n",
+                          start, end, r,w,x,p, offset,
+                          static_cast<int>(dev/256), static_cast<int>(dev%256),
+                          inode, filename);
+  return (rc < 0 || rc >= bufsize) ? 0 : rc;
+}
+
+namespace tcmalloc {
+
+// Helper to add the list of mapped shared libraries to a profile.
+// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size'
+// and return the actual size occupied in 'buf'.  We fill wrote_all to true
+// if we successfully wrote all proc lines to buf, false else.
+// We do not provision for 0-terminating 'buf'.
+int FillProcSelfMaps(char buf[], int size, bool* wrote_all) {
+  ProcMapsIterator::Buffer iterbuf;
+  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
+
+  uint64 start, end, offset;
+  int64 inode;
+  char *flags, *filename;
+  int bytes_written = 0;
+  *wrote_all = true;
+  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
+    const int line_length = it.FormatLine(buf + bytes_written,
+                                          size - bytes_written,
+                                          start, end, flags, offset,
+                                          inode, filename, 0);
+    if (line_length == 0)
+      *wrote_all = false;     // failed to write this line out
+    else
+      bytes_written += line_length;
+
+  }
+  return bytes_written;
+}
+
+// Dump the same data as FillProcSelfMaps reads to fd.
+// It seems easier to repeat parts of FillProcSelfMaps here than to
+// reuse it via a call.
+void DumpProcSelfMaps(RawFD fd) {
+  ProcMapsIterator::Buffer iterbuf;
+  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
+
+  uint64 start, end, offset;
+  int64 inode;
+  char *flags, *filename;
+  ProcMapsIterator::Buffer linebuf;
+  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
+    int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_),
+                                start, end, flags, offset, inode, filename,
+                                0);
+    RawWrite(fd, linebuf.buf_, written);
+  }
+}
+
+}  // namespace tcmalloc

diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h
new file mode 100644
index 0000000..cc5cb74
--- /dev/null
+++ b/src/base/sysinfo.h

@@ -0,0 +1,236 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// All functions here are thread-hostile due to file caching unless
+// commented otherwise.
+
+#ifndef _SYSINFO_H_
+#define _SYSINFO_H_
+
+#include <config.h>
+
+#include <time.h>
+#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
+#include <windows.h>   // for DWORD
+#include <tlhelp32.h>  // for CreateToolhelp32Snapshot
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>    // for pid_t
+#endif
+#include <stddef.h>    // for size_t
+#include <limits.h>    // for PATH_MAX
+#include "base/basictypes.h"
+#include "base/logging.h"   // for RawFD
+
+// This getenv function is safe to call before the C runtime is initialized.
+// On Windows, it utilizes GetEnvironmentVariable() and on unix it uses
+// /proc/self/environ instead calling getenv().  It's intended to be used in
+// routines that run before main(), when the state required for getenv() may
+// not be set up yet.  In particular, errno isn't set up until relatively late
+// (after the pthreads library has a chance to make it threadsafe), and
+// getenv() doesn't work until then. 
+// On some platforms, this call will utilize the same, static buffer for
+// repeated GetenvBeforeMain() calls. Callers should not expect pointers from
+// this routine to be long lived.
+// Note that on unix, /proc only has the environment at the time the
+// application was started, so this routine ignores setenv() calls/etc.  Also
+// note it only reads the first 16K of the environment.
+extern const char* GetenvBeforeMain(const char* name);
+
+// This takes as an argument an environment-variable name (like
+// CPUPROFILE) whose value is supposed to be a file-path, and sets
+// path to that path, and returns true.  Non-trivial for surprising
+// reasons, as documented in sysinfo.cc.  path must have space PATH_MAX.
+extern bool GetUniquePathFromEnv(const char* env_name, char* path);
+
+extern int NumCPUs();
+
+void SleepForMilliseconds(int milliseconds);
+
+// processor cycles per second of each processor.  Thread-safe.
+extern double CyclesPerSecond(void);
+
+
+//  Return true if we're running POSIX (e.g., NPTL on Linux) threads,
+//  as opposed to a non-POSIX thread library.  The thing that we care
+//  about is whether a thread's pid is the same as the thread that
+//  spawned it.  If so, this function returns true.
+//  Thread-safe.
+//  Note: We consider false negatives to be OK.
+bool HasPosixThreads();
+
+#ifndef SWIG  // SWIG doesn't like struct Buffer and variable arguments.
+
+// A ProcMapsIterator abstracts access to /proc/maps for a given
+// process. Needs to be stack-allocatable and avoid using stdio/malloc
+// so it can be used in the google stack dumper, heap-profiler, etc.
+//
+// On Windows and Mac OS X, this iterator iterates *only* over DLLs
+// mapped into this process space.  For Linux, FreeBSD, and Solaris,
+// it iterates over *all* mapped memory regions, including anonymous
+// mmaps.  For other O/Ss, it is unlikely to work at all, and Valid()
+// will always return false.  Also note: this routine only works on
+// FreeBSD if procfs is mounted: make sure this is in your /etc/fstab:
+//    proc            /proc   procfs  rw 0 0
+class ProcMapsIterator {
+ public:
+  struct Buffer {
+#ifdef __FreeBSD__
+    // FreeBSD requires us to read all of the maps file at once, so
+    // we have to make a buffer that's "always" big enough
+    static const size_t kBufSize = 102400;
+#else   // a one-line buffer is good enough
+    static const size_t kBufSize = PATH_MAX + 1024;
+#endif
+    char buf_[kBufSize];
+  };
+
+
+  // Create a new iterator for the specified pid.  pid can be 0 for "self".
+  explicit ProcMapsIterator(pid_t pid);
+
+  // Create an iterator with specified storage (for use in signal
+  // handler). "buffer" should point to a ProcMapsIterator::Buffer
+  // buffer can be NULL in which case a bufer will be allocated.
+  ProcMapsIterator(pid_t pid, Buffer *buffer);
+
+  // Iterate through maps_backing instead of maps if use_maps_backing
+  // is true.  Otherwise the same as above.  buffer can be NULL and
+  // it will allocate a buffer itself.
+  ProcMapsIterator(pid_t pid, Buffer *buffer,
+                   bool use_maps_backing);
+
+  // Returns true if the iterator successfully initialized;
+  bool Valid() const;
+
+  // Returns a pointer to the most recently parsed line. Only valid
+  // after Next() returns true, and until the iterator is destroyed or
+  // Next() is called again.  This may give strange results on non-Linux
+  // systems.  Prefer FormatLine() if that may be a concern.
+  const char *CurrentLine() const { return stext_; }
+
+  // Writes the "canonical" form of the /proc/xxx/maps info for a single
+  // line to the passed-in buffer. Returns the number of bytes written,
+  // or 0 if it was not able to write the complete line.  (To guarantee
+  // success, buffer should have size at least Buffer::kBufSize.)
+  // Takes as arguments values set via a call to Next().  The
+  // "canonical" form of the line (taken from linux's /proc/xxx/maps):
+  //    <start_addr(hex)>-<end_addr(hex)> <perms(rwxp)> <offset(hex)>   +
+  //    <major_dev(hex)>:<minor_dev(hex)> <inode> <filename> Note: the
+  // eg
+  //    08048000-0804c000 r-xp 00000000 03:01 3793678    /bin/cat
+  // If you don't have the dev_t (dev), feel free to pass in 0.
+  // (Next() doesn't return a dev_t, though NextExt does.)
+  //
+  // Note: if filename and flags were obtained via a call to Next(),
+  // then the output of this function is only valid if Next() returned
+  // true, and only until the iterator is destroyed or Next() is
+  // called again.  (Since filename, at least, points into CurrentLine.)
+  static int FormatLine(char* buffer, int bufsize,
+                        uint64 start, uint64 end, const char *flags,
+                        uint64 offset, int64 inode, const char *filename,
+                        dev_t dev);
+
+  // Find the next entry in /proc/maps; return true if found or false
+  // if at the end of the file.
+  //
+  // Any of the result pointers can be NULL if you're not interested
+  // in those values.
+  //
+  // If "flags" and "filename" are passed, they end up pointing to
+  // storage within the ProcMapsIterator that is valid only until the
+  // iterator is destroyed or Next() is called again. The caller may
+  // modify the contents of these strings (up as far as the first NUL,
+  // and only until the subsequent call to Next()) if desired.
+
+  // The offsets are all uint64 in order to handle the case of a
+  // 32-bit process running on a 64-bit kernel
+  //
+  // IMPORTANT NOTE: see top-of-class notes for details about what
+  // mapped regions Next() iterates over, depending on O/S.
+  // TODO(csilvers): make flags and filename const.
+  bool Next(uint64 *start, uint64 *end, char **flags,
+            uint64 *offset, int64 *inode, char **filename);
+
+  bool NextExt(uint64 *start, uint64 *end, char **flags,
+               uint64 *offset, int64 *inode, char **filename,
+               uint64 *file_mapping, uint64 *file_pages,
+               uint64 *anon_mapping, uint64 *anon_pages,
+               dev_t *dev);
+
+  ~ProcMapsIterator();
+
+ private:
+  void Init(pid_t pid, Buffer *buffer, bool use_maps_backing);
+
+  char *ibuf_;        // input buffer
+  char *stext_;       // start of text
+  char *etext_;       // end of text
+  char *nextline_;    // start of next line
+  char *ebuf_;        // end of buffer (1 char for a nul)
+#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
+  HANDLE snapshot_;   // filehandle on dll info
+  // In a change from the usual W-A pattern, there is no A variant of
+  // MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
+  // We want the original A variants, and this #undef is the only
+  // way I see to get them.  Redefining it when we're done prevents us
+  // from affecting other .cc files.
+# ifdef MODULEENTRY32  // Alias of W
+#   undef MODULEENTRY32
+  MODULEENTRY32 module_;   // info about current dll (and dll iterator)
+#   define MODULEENTRY32 MODULEENTRY32W
+# else  // It's the ascii, the one we want.
+  MODULEENTRY32 module_;   // info about current dll (and dll iterator)
+# endif
+#elif defined(__MACH__)
+  int current_image_; // dll's are called "images" in macos parlance
+  int current_load_cmd_;   // the segment of this dll we're examining
+#elif defined(__sun__)     // Solaris
+  int fd_;
+  char current_filename_[PATH_MAX];
+#else
+  int fd_;            // filehandle on /proc/*/maps
+#endif
+  pid_t pid_;
+  char flags_[10];
+  Buffer* dynamic_buffer_;  // dynamically-allocated Buffer
+  bool using_maps_backing_; // true if we are looking at maps_backing instead of maps.
+};
+
+#endif  /* #ifndef SWIG */
+
+// Helper routines
+
+namespace tcmalloc {
+int FillProcSelfMaps(char buf[], int size, bool* wrote_all);
+void DumpProcSelfMaps(RawFD fd);
+}
+
+#endif   /* #ifndef _SYSINFO_H_ */

diff --git a/src/base/thread_annotations.h b/src/base/thread_annotations.h
new file mode 100644
index 0000000..f57b299
--- /dev/null
+++ b/src/base/thread_annotations.h

@@ -0,0 +1,134 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Le-Chun Wu
+//
+// This header file contains the macro definitions for thread safety
+// annotations that allow the developers to document the locking policies
+// of their multi-threaded code. The annotations can also help program
+// analysis tools to identify potential thread safety issues.
+//
+// The annotations are implemented using GCC's "attributes" extension.
+// Using the macros defined here instead of the raw GCC attributes allows
+// for portability and future compatibility.
+//
+// This functionality is not yet fully implemented in perftools,
+// but may be one day.
+
+#ifndef BASE_THREAD_ANNOTATIONS_H_
+#define BASE_THREAD_ANNOTATIONS_H_
+
+
+#if defined(__GNUC__) \
+  && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \
+  && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG))
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
+#else
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
+#endif
+
+
+// Document if a shared variable/field needs to be protected by a lock.
+// GUARDED_BY allows the user to specify a particular lock that should be
+// held when accessing the annotated variable, while GUARDED_VAR only
+// indicates a shared variable should be guarded (by any lock). GUARDED_VAR
+// is primarily used when the client cannot express the name of the lock.
+#define GUARDED_BY(x)          THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+#define GUARDED_VAR            THREAD_ANNOTATION_ATTRIBUTE__(guarded)
+
+// Document if the memory location pointed to by a pointer should be guarded
+// by a lock when dereferencing the pointer. Similar to GUARDED_VAR,
+// PT_GUARDED_VAR is primarily used when the client cannot express the name
+// of the lock. Note that a pointer variable to a shared memory location
+// could itself be a shared variable. For example, if a shared global pointer
+// q, which is guarded by mu1, points to a shared memory location that is
+// guarded by mu2, q should be annotated as follows:
+//     int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2);
+#define PT_GUARDED_BY(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x))
+#define PT_GUARDED_VAR \
+  THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded)
+
+// Document the acquisition order between locks that can be held
+// simultaneously by a thread. For any two locks that need to be annotated
+// to establish an acquisition order, only one of them needs the annotation.
+// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER
+// and ACQUIRED_BEFORE.)
+#define ACQUIRED_AFTER(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(x))
+#define ACQUIRED_BEFORE(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(x))
+
+// The following three annotations document the lock requirements for
+// functions/methods.
+
+// Document if a function expects certain locks to be held before it is called
+#define EXCLUSIVE_LOCKS_REQUIRED(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(x))
+
+#define SHARED_LOCKS_REQUIRED(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(x))
+
+// Document the locks acquired in the body of the function. These locks
+// cannot be held when calling this function (as google3's Mutex locks are
+// non-reentrant).
+#define LOCKS_EXCLUDED(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(x))
+
+// Document the lock the annotated function returns without acquiring it.
+#define LOCK_RETURNED(x)       THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+
+// Document if a class/type is a lockable type (such as the Mutex class).
+#define LOCKABLE               THREAD_ANNOTATION_ATTRIBUTE__(lockable)
+
+// Document if a class is a scoped lockable type (such as the MutexLock class).
+#define SCOPED_LOCKABLE        THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+
+// The following annotations specify lock and unlock primitives.
+#define EXCLUSIVE_LOCK_FUNCTION(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(x))
+
+#define SHARED_LOCK_FUNCTION(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(x))
+
+#define EXCLUSIVE_TRYLOCK_FUNCTION(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(x))
+
+#define SHARED_TRYLOCK_FUNCTION(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(x))
+
+#define UNLOCK_FUNCTION(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(unlock(x))
+
+// An escape hatch for thread safety analysis to ignore the annotated function.
+#define NO_THREAD_SAFETY_ANALYSIS \
+  THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
+
+#endif  // BASE_THREAD_ANNOTATIONS_H_

diff --git a/src/base/thread_lister.c b/src/base/thread_lister.c
new file mode 100644
index 0000000..ca1b2de
--- /dev/null
+++ b/src/base/thread_lister.c

@@ -0,0 +1,77 @@
+/* Copyright (c) 2005-2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#include "config.h"
+#include <stdio.h>         /* needed for NULL on some powerpc platforms (?!) */
+#ifdef HAVE_SYS_PRCTL
+# include <sys/prctl.h>
+#endif
+#include "base/thread_lister.h"
+#include "base/linuxthreads.h"
+/* Include other thread listers here that define THREADS macro
+ * only when they can provide a good implementation.
+ */
+
+#ifndef THREADS
+
+/* Default trivial thread lister for single-threaded applications,
+ * or if the multi-threading code has not been ported, yet.
+ */
+
+int TCMalloc_ListAllProcessThreads(void *parameter,
+				   ListAllProcessThreadsCallBack callback, ...) {
+  int rc;
+  va_list ap;
+  pid_t pid;
+
+#ifdef HAVE_SYS_PRCTL
+  int dumpable = prctl(PR_GET_DUMPABLE, 0);
+  if (!dumpable)
+    prctl(PR_SET_DUMPABLE, 1);
+#endif
+  va_start(ap, callback);
+  pid = getpid();
+  rc = callback(parameter, 1, &pid, ap);
+  va_end(ap);
+#ifdef HAVE_SYS_PRCTL
+  if (!dumpable)
+    prctl(PR_SET_DUMPABLE, 0);
+#endif
+  return rc;
+}
+
+int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+  return 1;
+}
+
+#endif   /* ifndef THREADS */

diff --git a/src/base/thread_lister.h b/src/base/thread_lister.h
new file mode 100644
index 0000000..6e70b89
--- /dev/null
+++ b/src/base/thread_lister.h

@@ -0,0 +1,83 @@
+/* -*- Mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */
+/* Copyright (c) 2005-2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+#ifndef _THREAD_LISTER_H
+#define _THREAD_LISTER_H
+
+#include <stdarg.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*ListAllProcessThreadsCallBack)(void *parameter,
+                                             int num_threads,
+                                             pid_t *thread_pids,
+                                             va_list ap);
+
+/* This function gets the list of all linux threads of the current process
+ * passes them to the 'callback' along with the 'parameter' pointer; at the
+ * call back call time all the threads are paused via
+ * PTRACE_ATTACH.
+ * The callback is executed from a separate thread which shares only the
+ * address space, the filesystem, and the filehandles with the caller. Most
+ * notably, it does not share the same pid and ppid; and if it terminates,
+ * the rest of the application is still there. 'callback' is supposed to do
+ * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if
+ * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous
+ * signals are blocked. If the 'callback' decides to unblock them, it must
+ * ensure that they cannot terminate the application, or that
+ * TCMalloc_ResumeAllProcessThreads will get called.
+ * It is an error for the 'callback' to make any library calls that could
+ * acquire locks. Most notably, this means that most system calls have to
+ * avoid going through libc. Also, this means that it is not legal to call
+ * exit() or abort().
+ * We return -1 on error and the return value of 'callback' on success.
+ */
+int TCMalloc_ListAllProcessThreads(void *parameter,
+                                   ListAllProcessThreadsCallBack callback, ...);
+
+/* This function resumes the list of all linux threads that
+ * TCMalloc_ListAllProcessThreads pauses before giving to its
+ * callback.  The function returns non-zero if at least one thread was
+ * suspended and has now been resumed.
+ */
+int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _THREAD_LISTER_H */

diff --git a/src/base/vdso_support.cc b/src/base/vdso_support.cc
new file mode 100644
index 0000000..730df30
--- /dev/null
+++ b/src/base/vdso_support.cc

@@ -0,0 +1,143 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup in the kernel VDSO page.
+//
+// VDSOSupport -- a class representing kernel VDSO (if present).
+//
+
+#include "base/vdso_support.h"
+
+#ifdef HAVE_VDSO_SUPPORT     // defined in vdso_support.h
+
+#include <fcntl.h>
+#include <stddef.h>   // for ptrdiff_t
+
+#include "base/atomicops.h"  // for MemoryBarrier
+#include "base/linux_syscall_support.h"
+#include "base/logging.h"
+#include "base/dynamic_annotations.h"
+#include "base/basictypes.h"  // for COMPILE_ASSERT
+
+using base::subtle::MemoryBarrier;
+
+#ifndef AT_SYSINFO_EHDR
+#define AT_SYSINFO_EHDR 33
+#endif
+
+namespace base {
+
+const void *VDSOSupport::vdso_base_ = ElfMemImage::kInvalidBase;
+VDSOSupport::VDSOSupport()
+    // If vdso_base_ is still set to kInvalidBase, we got here
+    // before VDSOSupport::Init has been called. Call it now.
+    : image_(vdso_base_ == ElfMemImage::kInvalidBase ? Init() : vdso_base_) {
+}
+
+// NOTE: we can't use GoogleOnceInit() below, because we can be
+// called by tcmalloc, and none of the *once* stuff may be functional yet.
+//
+// In addition, we hope that the VDSOSupportHelper constructor
+// causes this code to run before there are any threads, and before
+// InitGoogle() has executed any chroot or setuid calls.
+//
+// Finally, even if there is a race here, it is harmless, because
+// the operation should be idempotent.
+const void *VDSOSupport::Init() {
+  if (vdso_base_ == ElfMemImage::kInvalidBase) {
+    // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[]
+    // on stack, and so glibc works as if VDSO was not present.
+    // But going directly to kernel via /proc/self/auxv below bypasses
+    // Valgrind zapping. So we check for Valgrind separately.
+    if (RunningOnValgrind()) {
+      vdso_base_ = NULL;
+      return NULL;
+    }
+    int fd = open("/proc/self/auxv", O_RDONLY);
+    if (fd == -1) {
+      // Kernel too old to have a VDSO.
+      vdso_base_ = NULL;
+      return NULL;
+    }
+    ElfW(auxv_t) aux;
+    while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
+      if (aux.a_type == AT_SYSINFO_EHDR) {
+        COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val),
+                       unexpected_sizeof_pointer_NE_sizeof_a_val);
+        vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val);
+        break;
+      }
+    }
+    close(fd);
+    if (vdso_base_ == ElfMemImage::kInvalidBase) {
+      // Didn't find AT_SYSINFO_EHDR in auxv[].
+      vdso_base_ = NULL;
+    }
+  }
+  return vdso_base_;
+}
+
+const void *VDSOSupport::SetBase(const void *base) {
+  CHECK(base != ElfMemImage::kInvalidBase);
+  const void *old_base = vdso_base_;
+  vdso_base_ = base;
+  image_.Init(base);
+  return old_base;
+}
+
+bool VDSOSupport::LookupSymbol(const char *name,
+                               const char *version,
+                               int type,
+                               SymbolInfo *info) const {
+  return image_.LookupSymbol(name, version, type, info);
+}
+
+bool VDSOSupport::LookupSymbolByAddress(const void *address,
+                                        SymbolInfo *info_out) const {
+  return image_.LookupSymbolByAddress(address, info_out);
+}
+
+// We need to make sure VDSOSupport::Init() is called before
+// the main() runs, since it might do something like setuid or
+// chroot.  If VDSOSupport
+// is used in any global constructor, this will happen, since
+// VDSOSupport's constructor calls Init.  But if not, we need to
+// ensure it here, with a global constructor of our own.  This
+// is an allowed exception to the normal rule against non-trivial
+// global constructors.
+static class VDSOInitHelper {
+ public:
+  VDSOInitHelper() { VDSOSupport::Init(); }
+} vdso_init_helper;
+}
+
+#endif  // HAVE_VDSO_SUPPORT

diff --git a/src/base/vdso_support.h b/src/base/vdso_support.h
new file mode 100644
index 0000000..c1209a4
--- /dev/null
+++ b/src/base/vdso_support.h

@@ -0,0 +1,132 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup in the kernel VDSO page.
+//
+// VDSO stands for "Virtual Dynamic Shared Object" -- a page of
+// executable code, which looks like a shared library, but doesn't
+// necessarily exist anywhere on disk, and which gets mmap()ed into
+// every process by kernels which support VDSO, such as 2.6.x for 32-bit
+// executables, and 2.6.24 and above for 64-bit executables.
+//
+// More details could be found here:
+// http://www.trilithium.com/johan/2005/08/linux-gate/
+//
+// VDSOSupport -- a class representing kernel VDSO (if present).
+//
+// Example usage:
+//  VDSOSupport vdso;
+//  VDSOSupport::SymbolInfo info;
+//  typedef (*FN)(unsigned *, void *, void *);
+//  FN fn = NULL;
+//  if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) {
+//     fn = reinterpret_cast<FN>(info.address);
+//  }
+
+#ifndef BASE_VDSO_SUPPORT_H_
+#define BASE_VDSO_SUPPORT_H_
+
+#include <config.h>
+#include "base/basictypes.h"
+#include "base/elf_mem_image.h"
+
+#ifdef HAVE_ELF_MEM_IMAGE
+
+#define HAVE_VDSO_SUPPORT 1
+
+#include <stdlib.h>     // for NULL
+
+namespace base {
+
+// NOTE: this class may be used from within tcmalloc, and can not
+// use any memory allocation routines.
+class VDSOSupport {
+ public:
+  VDSOSupport();
+
+  typedef ElfMemImage::SymbolInfo SymbolInfo;
+  typedef ElfMemImage::SymbolIterator SymbolIterator;
+
+  // Answers whether we have a vdso at all.
+  bool IsPresent() const { return image_.IsPresent(); }
+
+  // Allow to iterate over all VDSO symbols.
+  SymbolIterator begin() const { return image_.begin(); }
+  SymbolIterator end() const { return image_.end(); }
+
+  // Look up versioned dynamic symbol in the kernel VDSO.
+  // Returns false if VDSO is not present, or doesn't contain given
+  // symbol/version/type combination.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbol(const char *name, const char *version,
+                    int symbol_type, SymbolInfo *info_out) const;
+
+  // Find info about symbol (if any) which overlaps given address.
+  // Returns true if symbol was found; false if VDSO isn't present
+  // or doesn't have a symbol overlapping given address.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
+
+  // Used only for testing. Replace real VDSO base with a mock.
+  // Returns previous value of vdso_base_. After you are done testing,
+  // you are expected to call SetBase() with previous value, in order to
+  // reset state to the way it was.
+  const void *SetBase(const void *s);
+
+  // Computes vdso_base_ and returns it. Should be called as early as
+  // possible; before any thread creation, chroot or setuid.
+  static const void *Init();
+
+ private:
+  // image_ represents VDSO ELF image in memory.
+  // image_.ehdr_ == NULL implies there is no VDSO.
+  ElfMemImage image_;
+
+  // Cached value of auxv AT_SYSINFO_EHDR, computed once.
+  // This is a tri-state:
+  //   kInvalidBase   => value hasn't been determined yet.
+  //              0   => there is no VDSO.
+  //           else   => vma of VDSO Elf{32,64}_Ehdr.
+  //
+  // When testing with mock VDSO, low bit is set.
+  // The low bit is always available because vdso_base_ is
+  // page-aligned.
+  static const void *vdso_base_;
+
+  DISALLOW_COPY_AND_ASSIGN(VDSOSupport);
+};
+
+}  // namespace base
+
+#endif  // HAVE_ELF_MEM_IMAGE
+
+#endif  // BASE_VDSO_SUPPORT_H_
commit	745610d16119f59479f84918a66456ece9d6d461	[log] [tgz]
author	Austin Schuh <austin@peloton-tech.com>	Sun Sep 06 18:19:50 2015 -0700
committer	Austin Schuh <austin@peloton-tech.com>	Sun Sep 06 18:19:50 2015 -0700
tree	135f4ea4b4c31e809bdbaba6221da5cffb29fd88