Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame^] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | // Copyright (c) 2005, Google Inc. |
| 3 | // All rights reserved. |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
| 8 | // |
| 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
| 18 | // |
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | // --- |
| 32 | // Author: Sanjay Ghemawat |
| 33 | // |
| 34 | // Produce stack trace |
| 35 | |
| 36 | #ifndef BASE_STACKTRACE_X86_INL_H_ |
| 37 | #define BASE_STACKTRACE_X86_INL_H_ |
| 38 | // Note: this file is included into stacktrace.cc more than once. |
| 39 | // Anything that should only be defined once should be here: |
| 40 | |
| 41 | #include "config.h" |
| 42 | #include <stdlib.h> // for NULL |
| 43 | #include <assert.h> |
| 44 | #if defined(HAVE_SYS_UCONTEXT_H) |
| 45 | #include <sys/ucontext.h> |
| 46 | #elif defined(HAVE_UCONTEXT_H) |
| 47 | #include <ucontext.h> // for ucontext_t |
| 48 | #elif defined(HAVE_CYGWIN_SIGNAL_H) |
| 49 | // cygwin/signal.h has a buglet where it uses pthread_attr_t without |
| 50 | // #including <pthread.h> itself. So we have to do it. |
| 51 | # ifdef HAVE_PTHREAD |
| 52 | # include <pthread.h> |
| 53 | # endif |
| 54 | #include <cygwin/signal.h> |
| 55 | typedef ucontext ucontext_t; |
| 56 | #endif |
| 57 | #ifdef HAVE_STDINT_H |
| 58 | #include <stdint.h> // for uintptr_t |
| 59 | #endif |
| 60 | #ifdef HAVE_UNISTD_H |
| 61 | #include <unistd.h> |
| 62 | #endif |
| 63 | #ifdef HAVE_MMAP |
| 64 | #include <sys/mman.h> // for msync |
| 65 | #include "base/vdso_support.h" |
| 66 | #endif |
| 67 | |
| 68 | #include "gperftools/stacktrace.h" |
| 69 | |
| 70 | #if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP) |
| 71 | // Count "push %reg" instructions in VDSO __kernel_vsyscall(), |
| 72 | // preceding "syscall" or "sysenter". |
| 73 | // If __kernel_vsyscall uses frame pointer, answer 0. |
| 74 | // |
| 75 | // kMaxBytes tells how many instruction bytes of __kernel_vsyscall |
| 76 | // to analyze before giving up. Up to kMaxBytes+1 bytes of |
| 77 | // instructions could be accessed. |
| 78 | // |
| 79 | // Here are known __kernel_vsyscall instruction sequences: |
| 80 | // |
| 81 | // SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S). |
| 82 | // Used on Intel. |
| 83 | // 0xffffe400 <__kernel_vsyscall+0>: push %ecx |
| 84 | // 0xffffe401 <__kernel_vsyscall+1>: push %edx |
| 85 | // 0xffffe402 <__kernel_vsyscall+2>: push %ebp |
| 86 | // 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp |
| 87 | // 0xffffe405 <__kernel_vsyscall+5>: sysenter |
| 88 | // |
| 89 | // SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S). |
| 90 | // Used on AMD. |
| 91 | // 0xffffe400 <__kernel_vsyscall+0>: push %ebp |
| 92 | // 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp |
| 93 | // 0xffffe403 <__kernel_vsyscall+3>: syscall |
| 94 | // |
| 95 | // i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S) |
| 96 | // 0xffffe400 <__kernel_vsyscall+0>: int $0x80 |
| 97 | // 0xffffe401 <__kernel_vsyscall+1>: ret |
| 98 | // |
| 99 | static const int kMaxBytes = 10; |
| 100 | |
| 101 | // We use assert()s instead of DCHECK()s -- this is too low level |
| 102 | // for DCHECK(). |
| 103 | |
| 104 | static int CountPushInstructions(const unsigned char *const addr) { |
| 105 | int result = 0; |
| 106 | for (int i = 0; i < kMaxBytes; ++i) { |
| 107 | if (addr[i] == 0x89) { |
| 108 | // "mov reg,reg" |
| 109 | if (addr[i + 1] == 0xE5) { |
| 110 | // Found "mov %esp,%ebp". |
| 111 | return 0; |
| 112 | } |
| 113 | ++i; // Skip register encoding byte. |
| 114 | } else if (addr[i] == 0x0F && |
| 115 | (addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) { |
| 116 | // Found "sysenter" or "syscall". |
| 117 | return result; |
| 118 | } else if ((addr[i] & 0xF0) == 0x50) { |
| 119 | // Found "push %reg". |
| 120 | ++result; |
| 121 | } else if (addr[i] == 0xCD && addr[i + 1] == 0x80) { |
| 122 | // Found "int $0x80" |
| 123 | assert(result == 0); |
| 124 | return 0; |
| 125 | } else { |
| 126 | // Unexpected instruction. |
| 127 | assert(0 == "unexpected instruction in __kernel_vsyscall"); |
| 128 | return 0; |
| 129 | } |
| 130 | } |
| 131 | // Unexpected: didn't find SYSENTER or SYSCALL in |
| 132 | // [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval. |
| 133 | assert(0 == "did not find SYSENTER or SYSCALL in __kernel_vsyscall"); |
| 134 | return 0; |
| 135 | } |
| 136 | #endif |
| 137 | |
| 138 | // Given a pointer to a stack frame, locate and return the calling |
| 139 | // stackframe, or return NULL if no stackframe can be found. Perform sanity |
| 140 | // checks (the strictness of which is controlled by the boolean parameter |
| 141 | // "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. |
| 142 | template<bool STRICT_UNWINDING, bool WITH_CONTEXT> |
| 143 | static void **NextStackFrame(void **old_sp, const void *uc) { |
| 144 | void **new_sp = (void **) *old_sp; |
| 145 | |
| 146 | #if defined(__linux__) && defined(__i386__) && defined(HAVE_VDSO_SUPPORT) |
| 147 | if (WITH_CONTEXT && uc != NULL) { |
| 148 | // How many "push %reg" instructions are there at __kernel_vsyscall? |
| 149 | // This is constant for a given kernel and processor, so compute |
| 150 | // it only once. |
| 151 | static int num_push_instructions = -1; // Sentinel: not computed yet. |
| 152 | // Initialize with sentinel value: __kernel_rt_sigreturn can not possibly |
| 153 | // be there. |
| 154 | static const unsigned char *kernel_rt_sigreturn_address = NULL; |
| 155 | static const unsigned char *kernel_vsyscall_address = NULL; |
| 156 | if (num_push_instructions == -1) { |
| 157 | base::VDSOSupport vdso; |
| 158 | if (vdso.IsPresent()) { |
| 159 | base::VDSOSupport::SymbolInfo rt_sigreturn_symbol_info; |
| 160 | base::VDSOSupport::SymbolInfo vsyscall_symbol_info; |
| 161 | if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5", |
| 162 | STT_FUNC, &rt_sigreturn_symbol_info) || |
| 163 | !vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5", |
| 164 | STT_FUNC, &vsyscall_symbol_info) || |
| 165 | rt_sigreturn_symbol_info.address == NULL || |
| 166 | vsyscall_symbol_info.address == NULL) { |
| 167 | // Unexpected: 32-bit VDSO is present, yet one of the expected |
| 168 | // symbols is missing or NULL. |
| 169 | assert(0 == "VDSO is present, but doesn't have expected symbols"); |
| 170 | num_push_instructions = 0; |
| 171 | } else { |
| 172 | kernel_rt_sigreturn_address = |
| 173 | reinterpret_cast<const unsigned char *>( |
| 174 | rt_sigreturn_symbol_info.address); |
| 175 | kernel_vsyscall_address = |
| 176 | reinterpret_cast<const unsigned char *>( |
| 177 | vsyscall_symbol_info.address); |
| 178 | num_push_instructions = |
| 179 | CountPushInstructions(kernel_vsyscall_address); |
| 180 | } |
| 181 | } else { |
| 182 | num_push_instructions = 0; |
| 183 | } |
| 184 | } |
| 185 | if (num_push_instructions != 0 && kernel_rt_sigreturn_address != NULL && |
| 186 | old_sp[1] == kernel_rt_sigreturn_address) { |
| 187 | const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); |
| 188 | // This kernel does not use frame pointer in its VDSO code, |
| 189 | // and so %ebp is not suitable for unwinding. |
| 190 | void **const reg_ebp = |
| 191 | reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]); |
| 192 | const unsigned char *const reg_eip = |
| 193 | reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); |
| 194 | if (new_sp == reg_ebp && |
| 195 | kernel_vsyscall_address <= reg_eip && |
| 196 | reg_eip - kernel_vsyscall_address < kMaxBytes) { |
| 197 | // We "stepped up" to __kernel_vsyscall, but %ebp is not usable. |
| 198 | // Restore from 'ucv' instead. |
| 199 | void **const reg_esp = |
| 200 | reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]); |
| 201 | // Check that alleged %esp is not NULL and is reasonably aligned. |
| 202 | if (reg_esp && |
| 203 | ((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) { |
| 204 | // Check that alleged %esp is actually readable. This is to prevent |
| 205 | // "double fault" in case we hit the first fault due to e.g. stack |
| 206 | // corruption. |
| 207 | // |
| 208 | // page_size is linker-initalized to avoid async-unsafe locking |
| 209 | // that GCC would otherwise insert (__cxa_guard_acquire etc). |
| 210 | static int page_size; |
| 211 | if (page_size == 0) { |
| 212 | // First time through. |
| 213 | page_size = getpagesize(); |
| 214 | } |
| 215 | void *const reg_esp_aligned = |
| 216 | reinterpret_cast<void *>( |
| 217 | (uintptr_t)(reg_esp + num_push_instructions - 1) & |
| 218 | ~(page_size - 1)); |
| 219 | if (msync(reg_esp_aligned, page_size, MS_ASYNC) == 0) { |
| 220 | // Alleged %esp is readable, use it for further unwinding. |
| 221 | new_sp = reinterpret_cast<void **>( |
| 222 | reg_esp[num_push_instructions - 1]); |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | #endif |
| 229 | |
| 230 | // Check that the transition from frame pointer old_sp to frame |
| 231 | // pointer new_sp isn't clearly bogus |
| 232 | if (STRICT_UNWINDING) { |
| 233 | // With the stack growing downwards, older stack frame must be |
| 234 | // at a greater address that the current one. |
| 235 | if (new_sp <= old_sp) return NULL; |
| 236 | // Assume stack frames larger than 100,000 bytes are bogus. |
| 237 | if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; |
| 238 | } else { |
| 239 | // In the non-strict mode, allow discontiguous stack frames. |
| 240 | // (alternate-signal-stacks for example). |
| 241 | if (new_sp == old_sp) return NULL; |
| 242 | if (new_sp > old_sp) { |
| 243 | // And allow frames upto about 1MB. |
| 244 | const uintptr_t delta = (uintptr_t)new_sp - (uintptr_t)old_sp; |
| 245 | const uintptr_t acceptable_delta = 1000000; |
| 246 | if (delta > acceptable_delta) { |
| 247 | return NULL; |
| 248 | } |
| 249 | } |
| 250 | } |
| 251 | if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; |
| 252 | #ifdef __i386__ |
| 253 | // On 64-bit machines, the stack pointer can be very close to |
| 254 | // 0xffffffff, so we explicitly check for a pointer into the |
| 255 | // last two pages in the address space |
| 256 | if ((uintptr_t)new_sp >= 0xffffe000) return NULL; |
| 257 | #endif |
| 258 | #ifdef HAVE_MMAP |
| 259 | if (!STRICT_UNWINDING) { |
| 260 | // Lax sanity checks cause a crash on AMD-based machines with |
| 261 | // VDSO-enabled kernels. |
| 262 | // Make an extra sanity check to insure new_sp is readable. |
| 263 | // Note: NextStackFrame<false>() is only called while the program |
| 264 | // is already on its last leg, so it's ok to be slow here. |
| 265 | static int page_size = getpagesize(); |
| 266 | void *new_sp_aligned = (void *)((uintptr_t)new_sp & ~(page_size - 1)); |
| 267 | if (msync(new_sp_aligned, page_size, MS_ASYNC) == -1) |
| 268 | return NULL; |
| 269 | } |
| 270 | #endif |
| 271 | return new_sp; |
| 272 | } |
| 273 | |
| 274 | #endif // BASE_STACKTRACE_X86_INL_H_ |
| 275 | |
| 276 | // Note: this part of the file is included several times. |
| 277 | // Do not put globals below. |
| 278 | |
| 279 | // The following 4 functions are generated from the code below: |
| 280 | // GetStack{Trace,Frames}() |
| 281 | // GetStack{Trace,Frames}WithContext() |
| 282 | // |
| 283 | // These functions take the following args: |
| 284 | // void** result: the stack-trace, as an array |
| 285 | // int* sizes: the size of each stack frame, as an array |
| 286 | // (GetStackFrames* only) |
| 287 | // int max_depth: the size of the result (and sizes) array(s) |
| 288 | // int skip_count: how many stack pointers to skip before storing in result |
| 289 | // void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) |
| 290 | |
| 291 | static int GET_STACK_TRACE_OR_FRAMES { |
| 292 | void **sp; |
| 293 | #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ |
| 294 | // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. |
| 295 | // It's always correct on llvm, and the techniques below aren't (in |
| 296 | // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), |
| 297 | // so we also prefer __builtin_frame_address when running under llvm. |
| 298 | sp = reinterpret_cast<void**>(__builtin_frame_address(0)); |
| 299 | #elif defined(__i386__) |
| 300 | // Stack frame format: |
| 301 | // sp[0] pointer to previous frame |
| 302 | // sp[1] caller address |
| 303 | // sp[2] first argument |
| 304 | // ... |
| 305 | // NOTE: This will break under llvm, since result is a copy and not in sp[2] |
| 306 | sp = (void **)&result - 2; |
| 307 | #elif defined(__x86_64__) |
| 308 | unsigned long rbp; |
| 309 | // Move the value of the register %rbp into the local variable rbp. |
| 310 | // We need 'volatile' to prevent this instruction from getting moved |
| 311 | // around during optimization to before function prologue is done. |
| 312 | // An alternative way to achieve this |
| 313 | // would be (before this __asm__ instruction) to call Noop() defined as |
| 314 | // static void Noop() __attribute__ ((noinline)); // prevent inlining |
| 315 | // static void Noop() { asm(""); } // prevent optimizing-away |
| 316 | __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); |
| 317 | // Arguments are passed in registers on x86-64, so we can't just |
| 318 | // offset from &result |
| 319 | sp = (void **) rbp; |
| 320 | #else |
| 321 | # error Using stacktrace_x86-inl.h on a non x86 architecture! |
| 322 | #endif |
| 323 | |
| 324 | skip_count++; // skip parent's frame due to indirection in stacktrace.cc |
| 325 | |
| 326 | int n = 0; |
| 327 | while (sp && n < max_depth) { |
| 328 | if (*(sp+1) == reinterpret_cast<void *>(0)) { |
| 329 | // In 64-bit code, we often see a frame that |
| 330 | // points to itself and has a return address of 0. |
| 331 | break; |
| 332 | } |
| 333 | #if !IS_WITH_CONTEXT |
| 334 | const void *const ucp = NULL; |
| 335 | #endif |
| 336 | void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); |
| 337 | if (skip_count > 0) { |
| 338 | skip_count--; |
| 339 | } else { |
| 340 | result[n] = *(sp+1); |
| 341 | #if IS_STACK_FRAMES |
| 342 | if (next_sp > sp) { |
| 343 | sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; |
| 344 | } else { |
| 345 | // A frame-size of 0 is used to indicate unknown frame size. |
| 346 | sizes[n] = 0; |
| 347 | } |
| 348 | #endif |
| 349 | n++; |
| 350 | } |
| 351 | sp = next_sp; |
| 352 | } |
| 353 | return n; |
| 354 | } |