blob: f56e97c99332a264b6f3f2303e98f575d2fb6e46 [file] [log] [blame]
Austin Schuh906616c2019-01-21 20:25:11 -08001// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// Author: Satoru Takabayashi
31// Stack-footprint reduction work done by Raksit Ashok
32//
33// Implementation note:
34//
35// We don't use heaps but only use stacks. We want to reduce the
36// stack consumption so that the symbolizer can run on small stacks.
37//
38// Here are some numbers collected with GCC 4.1.0 on x86:
39// - sizeof(Elf32_Sym) = 16
40// - sizeof(Elf32_Shdr) = 40
41// - sizeof(Elf64_Sym) = 24
42// - sizeof(Elf64_Shdr) = 64
43//
44// This implementation is intended to be async-signal-safe but uses
45// some functions which are not guaranteed to be so, such as memchr()
46// and memmove(). We assume they are async-signal-safe.
47//
48// Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE
James Kuszmaulba0ac1a2022-08-12 16:29:30 -070049// macro to add platform specific defines (e.g. GLOG_OS_OPENBSD).
Austin Schuh906616c2019-01-21 20:25:11 -080050
51#ifdef GLOG_BUILD_CONFIG_INCLUDE
52#include GLOG_BUILD_CONFIG_INCLUDE
53#endif // GLOG_BUILD_CONFIG_INCLUDE
54
55#include "utilities.h"
56
57#if defined(HAVE_SYMBOLIZE)
58
James Kuszmaulba0ac1a2022-08-12 16:29:30 -070059#include <cstring>
Austin Schuh906616c2019-01-21 20:25:11 -080060
61#include <algorithm>
62#include <limits>
63
64#include "symbolize.h"
65#include "demangle.h"
66
67_START_GOOGLE_NAMESPACE_
68
69// We don't use assert() since it's not guaranteed to be
70// async-signal-safe. Instead we define a minimal assertion
71// macro. So far, we don't need pretty printing for __FILE__, etc.
72
73// A wrapper for abort() to make it callable in ? :.
74static int AssertFail() {
75 abort();
76 return 0; // Should not reach.
77}
78
79#define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail())
80
81static SymbolizeCallback g_symbolize_callback = NULL;
82void InstallSymbolizeCallback(SymbolizeCallback callback) {
83 g_symbolize_callback = callback;
84}
85
86static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback =
87 NULL;
88void InstallSymbolizeOpenObjectFileCallback(
89 SymbolizeOpenObjectFileCallback callback) {
90 g_symbolize_open_object_file_callback = callback;
91}
92
93// This function wraps the Demangle function to provide an interface
94// where the input symbol is demangled in-place.
95// To keep stack consumption low, we would like this function to not
96// get inlined.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -070097static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, size_t out_size) {
Austin Schuh906616c2019-01-21 20:25:11 -080098 char demangled[256]; // Big enough for sane demangled symbols.
99 if (Demangle(out, demangled, sizeof(demangled))) {
100 // Demangling succeeded. Copy to out if the space allows.
101 size_t len = strlen(demangled);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700102 if (len + 1 <= out_size) { // +1 for '\0'.
Austin Schuh906616c2019-01-21 20:25:11 -0800103 SAFE_ASSERT(len < sizeof(demangled));
104 memmove(out, demangled, len + 1);
105 }
106 }
107}
108
109_END_GOOGLE_NAMESPACE_
110
111#if defined(__ELF__)
112
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700113#if defined(HAVE_DLFCN_H)
Austin Schuh906616c2019-01-21 20:25:11 -0800114#include <dlfcn.h>
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700115#endif
116#if defined(GLOG_OS_OPENBSD)
Austin Schuh906616c2019-01-21 20:25:11 -0800117#include <sys/exec_elf.h>
118#else
119#include <elf.h>
120#endif
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700121#include <cerrno>
122#include <climits>
123#include <cstddef>
124#include <cstdio>
125#include <cstdlib>
126#include <cstring>
Austin Schuh906616c2019-01-21 20:25:11 -0800127#include <fcntl.h>
Austin Schuh906616c2019-01-21 20:25:11 -0800128#include <stdint.h>
Austin Schuh906616c2019-01-21 20:25:11 -0800129#include <sys/stat.h>
130#include <sys/types.h>
131#include <unistd.h>
132
133#include "symbolize.h"
134#include "config.h"
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700135#include <glog/raw_logging.h>
Austin Schuh906616c2019-01-21 20:25:11 -0800136
137// Re-runs fn until it doesn't cause EINTR.
138#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
139
140_START_GOOGLE_NAMESPACE_
141
142// Read up to "count" bytes from "offset" in the file pointed by file
143// descriptor "fd" into the buffer starting at "buf" while handling short reads
144// and EINTR. On success, return the number of bytes read. Otherwise, return
145// -1.
146static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700147 const size_t offset) {
Austin Schuh906616c2019-01-21 20:25:11 -0800148 SAFE_ASSERT(fd >= 0);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700149 SAFE_ASSERT(count <= static_cast<size_t>(std::numeric_limits<ssize_t>::max()));
Austin Schuh906616c2019-01-21 20:25:11 -0800150 char *buf0 = reinterpret_cast<char *>(buf);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700151 size_t num_bytes = 0;
Austin Schuh906616c2019-01-21 20:25:11 -0800152 while (num_bytes < count) {
153 ssize_t len;
154 NO_INTR(len = pread(fd, buf0 + num_bytes, count - num_bytes,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700155 static_cast<off_t>(offset + num_bytes)));
Austin Schuh906616c2019-01-21 20:25:11 -0800156 if (len < 0) { // There was an error other than EINTR.
157 return -1;
158 }
159 if (len == 0) { // Reached EOF.
160 break;
161 }
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700162 num_bytes += static_cast<size_t>(len);
Austin Schuh906616c2019-01-21 20:25:11 -0800163 }
164 SAFE_ASSERT(num_bytes <= count);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700165 return static_cast<ssize_t>(num_bytes);
Austin Schuh906616c2019-01-21 20:25:11 -0800166}
167
168// Try reading exactly "count" bytes from "offset" bytes in a file
169// pointed by "fd" into the buffer starting at "buf" while handling
170// short reads and EINTR. On success, return true. Otherwise, return
171// false.
172static bool ReadFromOffsetExact(const int fd, void *buf,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700173 const size_t count, const size_t offset) {
Austin Schuh906616c2019-01-21 20:25:11 -0800174 ssize_t len = ReadFromOffset(fd, buf, count, offset);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700175 return static_cast<size_t>(len) == count;
Austin Schuh906616c2019-01-21 20:25:11 -0800176}
177
178// Returns elf_header.e_type if the file pointed by fd is an ELF binary.
179static int FileGetElfType(const int fd) {
180 ElfW(Ehdr) elf_header;
181 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
182 return -1;
183 }
184 if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
185 return -1;
186 }
187 return elf_header.e_type;
188}
189
190// Read the section headers in the given ELF binary, and if a section
191// of the specified type is found, set the output to this section header
192// and return true. Otherwise, return false.
193// To keep stack consumption low, we would like this function to not get
194// inlined.
195static ATTRIBUTE_NOINLINE bool
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700196GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const size_t sh_offset,
Austin Schuh906616c2019-01-21 20:25:11 -0800197 ElfW(Word) type, ElfW(Shdr) *out) {
198 // Read at most 16 section headers at a time to save read calls.
199 ElfW(Shdr) buf[16];
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700200 for (size_t i = 0; i < sh_num;) {
201 const size_t num_bytes_left = (sh_num - i) * sizeof(buf[0]);
202 const size_t num_bytes_to_read =
Austin Schuh906616c2019-01-21 20:25:11 -0800203 (sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf);
204 const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read,
205 sh_offset + i * sizeof(buf[0]));
206 if (len == -1) {
207 return false;
208 }
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700209 SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0);
210 const size_t num_headers_in_buf = static_cast<size_t>(len) / sizeof(buf[0]);
Austin Schuh906616c2019-01-21 20:25:11 -0800211 SAFE_ASSERT(num_headers_in_buf <= sizeof(buf) / sizeof(buf[0]));
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700212 for (size_t j = 0; j < num_headers_in_buf; ++j) {
Austin Schuh906616c2019-01-21 20:25:11 -0800213 if (buf[j].sh_type == type) {
214 *out = buf[j];
215 return true;
216 }
217 }
218 i += num_headers_in_buf;
219 }
220 return false;
221}
222
223// There is no particular reason to limit section name to 63 characters,
224// but there has (as yet) been no need for anything longer either.
225const int kMaxSectionNameLen = 64;
226
227// name_len should include terminating '\0'.
228bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
229 ElfW(Shdr) *out) {
230 ElfW(Ehdr) elf_header;
231 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
232 return false;
233 }
234
235 ElfW(Shdr) shstrtab;
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700236 size_t shstrtab_offset =
237 (elf_header.e_shoff + static_cast<size_t>(elf_header.e_shentsize) *
238 static_cast<size_t>(elf_header.e_shstrndx));
Austin Schuh906616c2019-01-21 20:25:11 -0800239 if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
240 return false;
241 }
242
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700243 for (size_t i = 0; i < elf_header.e_shnum; ++i) {
244 size_t section_header_offset = (elf_header.e_shoff +
Austin Schuh906616c2019-01-21 20:25:11 -0800245 elf_header.e_shentsize * i);
246 if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
247 return false;
248 }
249 char header_name[kMaxSectionNameLen];
250 if (sizeof(header_name) < name_len) {
251 RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); "
252 "section will not be found (even if present).", name, name_len);
253 // No point in even trying.
254 return false;
255 }
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700256 size_t name_offset = shstrtab.sh_offset + out->sh_name;
Austin Schuh906616c2019-01-21 20:25:11 -0800257 ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
258 if (n_read == -1) {
259 return false;
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700260 } else if (static_cast<size_t>(n_read) != name_len) {
Austin Schuh906616c2019-01-21 20:25:11 -0800261 // Short read -- name could be at end of file.
262 continue;
263 }
264 if (memcmp(header_name, name, name_len) == 0) {
265 return true;
266 }
267 }
268 return false;
269}
270
271// Read a symbol table and look for the symbol containing the
272// pc. Iterate over symbols in a symbol table and look for the symbol
273// containing "pc". On success, return true and write the symbol name
274// to out. Otherwise, return false.
275// To keep stack consumption low, we would like this function to not get
276// inlined.
277static ATTRIBUTE_NOINLINE bool
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700278FindSymbol(uint64_t pc, const int fd, char *out, size_t out_size,
Austin Schuh906616c2019-01-21 20:25:11 -0800279 uint64_t symbol_offset, const ElfW(Shdr) *strtab,
280 const ElfW(Shdr) *symtab) {
281 if (symtab == NULL) {
282 return false;
283 }
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700284 const size_t num_symbols = symtab->sh_size / symtab->sh_entsize;
285 for (unsigned i = 0; i < num_symbols;) {
286 size_t offset = symtab->sh_offset + i * symtab->sh_entsize;
Austin Schuh906616c2019-01-21 20:25:11 -0800287
288 // If we are reading Elf64_Sym's, we want to limit this array to
289 // 32 elements (to keep stack consumption low), otherwise we can
290 // have a 64 element Elf32_Sym array.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700291#if defined(__WORDSIZE) && __WORDSIZE == 64
292 const size_t NUM_SYMBOLS = 32U;
Austin Schuh906616c2019-01-21 20:25:11 -0800293#else
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700294 const size_t NUM_SYMBOLS = 64U;
Austin Schuh906616c2019-01-21 20:25:11 -0800295#endif
296
297 // Read at most NUM_SYMBOLS symbols at once to save read() calls.
298 ElfW(Sym) buf[NUM_SYMBOLS];
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700299 size_t num_symbols_to_read = std::min(NUM_SYMBOLS, num_symbols - i);
Austin Schuh906616c2019-01-21 20:25:11 -0800300 const ssize_t len =
301 ReadFromOffset(fd, &buf, sizeof(buf[0]) * num_symbols_to_read, offset);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700302 SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0);
303 const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]);
Austin Schuh906616c2019-01-21 20:25:11 -0800304 SAFE_ASSERT(num_symbols_in_buf <= num_symbols_to_read);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700305 for (unsigned j = 0; j < num_symbols_in_buf; ++j) {
Austin Schuh906616c2019-01-21 20:25:11 -0800306 const ElfW(Sym)& symbol = buf[j];
307 uint64_t start_address = symbol.st_value;
308 start_address += symbol_offset;
309 uint64_t end_address = start_address + symbol.st_size;
310 if (symbol.st_value != 0 && // Skip null value symbols.
311 symbol.st_shndx != 0 && // Skip undefined symbols.
312 start_address <= pc && pc < end_address) {
313 ssize_t len1 = ReadFromOffset(fd, out, out_size,
314 strtab->sh_offset + symbol.st_name);
315 if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700316 memset(out, 0, out_size);
Austin Schuh906616c2019-01-21 20:25:11 -0800317 return false;
318 }
319 return true; // Obtained the symbol name.
320 }
321 }
322 i += num_symbols_in_buf;
323 }
324 return false;
325}
326
327// Get the symbol name of "pc" from the file pointed by "fd". Process
328// both regular and dynamic symbol tables if necessary. On success,
329// write the symbol name to "out" and return true. Otherwise, return
330// false.
331static bool GetSymbolFromObjectFile(const int fd,
332 uint64_t pc,
333 char* out,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700334 size_t out_size,
Austin Schuh906616c2019-01-21 20:25:11 -0800335 uint64_t base_address) {
336 // Read the ELF header.
337 ElfW(Ehdr) elf_header;
338 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
339 return false;
340 }
341
342 ElfW(Shdr) symtab, strtab;
343
344 // Consult a regular symbol table first.
345 if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
346 SHT_SYMTAB, &symtab)) {
347 if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
348 symtab.sh_link * sizeof(symtab))) {
349 return false;
350 }
351 if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
352 return true; // Found the symbol in a regular symbol table.
353 }
354 }
355
356 // If the symbol is not found, then consult a dynamic symbol table.
357 if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
358 SHT_DYNSYM, &symtab)) {
359 if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
360 symtab.sh_link * sizeof(symtab))) {
361 return false;
362 }
363 if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
364 return true; // Found the symbol in a dynamic symbol table.
365 }
366 }
367
368 return false;
369}
370
371namespace {
372// Thin wrapper around a file descriptor so that the file descriptor
373// gets closed for sure.
374struct FileDescriptor {
375 const int fd_;
376 explicit FileDescriptor(int fd) : fd_(fd) {}
377 ~FileDescriptor() {
378 if (fd_ >= 0) {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700379 close(fd_);
Austin Schuh906616c2019-01-21 20:25:11 -0800380 }
381 }
382 int get() { return fd_; }
383
384 private:
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700385 FileDescriptor(const FileDescriptor &);
Austin Schuh906616c2019-01-21 20:25:11 -0800386 void operator=(const FileDescriptor&);
387};
388
389// Helper class for reading lines from file.
390//
391// Note: we don't use ProcMapsIterator since the object is big (it has
392// a 5k array member) and uses async-unsafe functions such as sscanf()
393// and snprintf().
394class LineReader {
395 public:
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700396 explicit LineReader(int fd, char *buf, size_t buf_len, size_t offset)
Austin Schuh906616c2019-01-21 20:25:11 -0800397 : fd_(fd),
398 buf_(buf),
399 buf_len_(buf_len),
400 offset_(offset),
401 bol_(buf),
402 eol_(buf),
403 eod_(buf) {}
404
405 // Read '\n'-terminated line from file. On success, modify "bol"
406 // and "eol", then return true. Otherwise, return false.
407 //
408 // Note: if the last line doesn't end with '\n', the line will be
409 // dropped. It's an intentional behavior to make the code simple.
410 bool ReadLine(const char **bol, const char **eol) {
411 if (BufferIsEmpty()) { // First time.
412 const ssize_t num_bytes = ReadFromOffset(fd_, buf_, buf_len_, offset_);
413 if (num_bytes <= 0) { // EOF or error.
414 return false;
415 }
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700416 offset_ += static_cast<size_t>(num_bytes);
Austin Schuh906616c2019-01-21 20:25:11 -0800417 eod_ = buf_ + num_bytes;
418 bol_ = buf_;
419 } else {
420 bol_ = eol_ + 1; // Advance to the next line in the buffer.
421 SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_".
422 if (!HasCompleteLine()) {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700423 const size_t incomplete_line_length = static_cast<size_t>(eod_ - bol_);
Austin Schuh906616c2019-01-21 20:25:11 -0800424 // Move the trailing incomplete line to the beginning.
425 memmove(buf_, bol_, incomplete_line_length);
426 // Read text from file and append it.
427 char * const append_pos = buf_ + incomplete_line_length;
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700428 const size_t capacity_left = buf_len_ - incomplete_line_length;
Austin Schuh906616c2019-01-21 20:25:11 -0800429 const ssize_t num_bytes =
430 ReadFromOffset(fd_, append_pos, capacity_left, offset_);
431 if (num_bytes <= 0) { // EOF or error.
432 return false;
433 }
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700434 offset_ += static_cast<size_t>(num_bytes);
Austin Schuh906616c2019-01-21 20:25:11 -0800435 eod_ = append_pos + num_bytes;
436 bol_ = buf_;
437 }
438 }
439 eol_ = FindLineFeed();
440 if (eol_ == NULL) { // '\n' not found. Malformed line.
441 return false;
442 }
443 *eol_ = '\0'; // Replace '\n' with '\0'.
444
445 *bol = bol_;
446 *eol = eol_;
447 return true;
448 }
449
450 // Beginning of line.
451 const char *bol() {
452 return bol_;
453 }
454
455 // End of line.
456 const char *eol() {
457 return eol_;
458 }
459
460 private:
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700461 LineReader(const LineReader &);
Austin Schuh906616c2019-01-21 20:25:11 -0800462 void operator=(const LineReader&);
463
464 char *FindLineFeed() {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700465 return reinterpret_cast<char *>(memchr(bol_, '\n', static_cast<size_t>(eod_ - bol_)));
Austin Schuh906616c2019-01-21 20:25:11 -0800466 }
467
468 bool BufferIsEmpty() {
469 return buf_ == eod_;
470 }
471
472 bool HasCompleteLine() {
473 return !BufferIsEmpty() && FindLineFeed() != NULL;
474 }
475
476 const int fd_;
477 char * const buf_;
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700478 const size_t buf_len_;
479 size_t offset_;
Austin Schuh906616c2019-01-21 20:25:11 -0800480 char *bol_;
481 char *eol_;
482 const char *eod_; // End of data in "buf_".
483};
484} // namespace
485
486// Place the hex number read from "start" into "*hex". The pointer to
487// the first non-hex character or "end" is returned.
488static char *GetHex(const char *start, const char *end, uint64_t *hex) {
489 *hex = 0;
490 const char *p;
491 for (p = start; p < end; ++p) {
492 int ch = *p;
493 if ((ch >= '0' && ch <= '9') ||
494 (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700495 *hex = (*hex << 4U) | (ch < 'A' ? static_cast<uint64_t>(ch - '0') : (ch & 0xF) + 9U);
Austin Schuh906616c2019-01-21 20:25:11 -0800496 } else { // Encountered the first non-hex character.
497 break;
498 }
499 }
500 SAFE_ASSERT(p <= end);
501 return const_cast<char *>(p);
502}
503
504// Searches for the object file (from /proc/self/maps) that contains
505// the specified pc. If found, sets |start_address| to the start address
506// of where this object file is mapped in memory, sets the module base
507// address into |base_address|, copies the object file name into
508// |out_file_name|, and attempts to open the object file. If the object
509// file is opened successfully, returns the file descriptor. Otherwise,
510// returns -1. |out_file_name_size| is the size of the file name buffer
511// (including the null-terminator).
512static ATTRIBUTE_NOINLINE int
513OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
514 uint64_t &start_address,
515 uint64_t &base_address,
516 char *out_file_name,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700517 size_t out_file_name_size) {
Austin Schuh906616c2019-01-21 20:25:11 -0800518 int object_fd;
519
520 int maps_fd;
521 NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
522 FileDescriptor wrapped_maps_fd(maps_fd);
523 if (wrapped_maps_fd.get() < 0) {
524 return -1;
525 }
526
527 int mem_fd;
528 NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY));
529 FileDescriptor wrapped_mem_fd(mem_fd);
530 if (wrapped_mem_fd.get() < 0) {
531 return -1;
532 }
533
534 // Iterate over maps and look for the map containing the pc. Then
535 // look into the symbol tables inside.
536 char buf[1024]; // Big enough for line of sane /proc/self/maps
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700537 unsigned num_maps = 0;
Austin Schuh906616c2019-01-21 20:25:11 -0800538 LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf), 0);
539 while (true) {
540 num_maps++;
541 const char *cursor;
542 const char *eol;
543 if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line.
544 return -1;
545 }
546
547 // Start parsing line in /proc/self/maps. Here is an example:
548 //
549 // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat
550 //
551 // We want start address (08048000), end address (0804c000), flags
552 // (r-xp) and file name (/bin/cat).
553
554 // Read start address.
555 cursor = GetHex(cursor, eol, &start_address);
556 if (cursor == eol || *cursor != '-') {
557 return -1; // Malformed line.
558 }
559 ++cursor; // Skip '-'.
560
561 // Read end address.
562 uint64_t end_address;
563 cursor = GetHex(cursor, eol, &end_address);
564 if (cursor == eol || *cursor != ' ') {
565 return -1; // Malformed line.
566 }
567 ++cursor; // Skip ' '.
568
569 // Read flags. Skip flags until we encounter a space or eol.
570 const char * const flags_start = cursor;
571 while (cursor < eol && *cursor != ' ') {
572 ++cursor;
573 }
574 // We expect at least four letters for flags (ex. "r-xp").
575 if (cursor == eol || cursor < flags_start + 4) {
576 return -1; // Malformed line.
577 }
578
579 // Determine the base address by reading ELF headers in process memory.
580 ElfW(Ehdr) ehdr;
581 // Skip non-readable maps.
582 if (flags_start[0] == 'r' &&
583 ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) &&
584 memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) {
585 switch (ehdr.e_type) {
586 case ET_EXEC:
587 base_address = 0;
588 break;
589 case ET_DYN:
590 // Find the segment containing file offset 0. This will correspond
591 // to the ELF header that we just read. Normally this will have
592 // virtual address 0, but this is not guaranteed. We must subtract
593 // the virtual address from the address where the ELF header was
594 // mapped to get the base address.
595 //
596 // If we fail to find a segment for file offset 0, use the address
597 // of the ELF header as the base address.
598 base_address = start_address;
599 for (unsigned i = 0; i != ehdr.e_phnum; ++i) {
600 ElfW(Phdr) phdr;
601 if (ReadFromOffsetExact(
602 mem_fd, &phdr, sizeof(phdr),
603 start_address + ehdr.e_phoff + i * sizeof(phdr)) &&
604 phdr.p_type == PT_LOAD && phdr.p_offset == 0) {
605 base_address = start_address - phdr.p_vaddr;
606 break;
607 }
608 }
609 break;
610 default:
611 // ET_REL or ET_CORE. These aren't directly executable, so they don't
612 // affect the base address.
613 break;
614 }
615 }
616
617 // Check start and end addresses.
618 if (!(start_address <= pc && pc < end_address)) {
619 continue; // We skip this map. PC isn't in this map.
620 }
621
622 // Check flags. We are only interested in "r*x" maps.
623 if (flags_start[0] != 'r' || flags_start[2] != 'x') {
624 continue; // We skip this map.
625 }
626 ++cursor; // Skip ' '.
627
628 // Read file offset.
629 uint64_t file_offset;
630 cursor = GetHex(cursor, eol, &file_offset);
631 if (cursor == eol || *cursor != ' ') {
632 return -1; // Malformed line.
633 }
634 ++cursor; // Skip ' '.
635
636 // Skip to file name. "cursor" now points to dev. We need to
637 // skip at least two spaces for dev and inode.
638 int num_spaces = 0;
639 while (cursor < eol) {
640 if (*cursor == ' ') {
641 ++num_spaces;
642 } else if (num_spaces >= 2) {
643 // The first non-space character after skipping two spaces
644 // is the beginning of the file name.
645 break;
646 }
647 ++cursor;
648 }
649 if (cursor == eol) {
650 return -1; // Malformed line.
651 }
652
653 // Finally, "cursor" now points to file name of our interest.
654 NO_INTR(object_fd = open(cursor, O_RDONLY));
655 if (object_fd < 0) {
656 // Failed to open object file. Copy the object file name to
657 // |out_file_name|.
658 strncpy(out_file_name, cursor, out_file_name_size);
659 // Making sure |out_file_name| is always null-terminated.
660 out_file_name[out_file_name_size - 1] = '\0';
661 return -1;
662 }
663 return object_fd;
664 }
665}
666
667// POSIX doesn't define any async-signal safe function for converting
668// an integer to ASCII. We'll have to define our own version.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700669// itoa_r() converts an (unsigned) integer to ASCII. It returns "buf", if the
Austin Schuh906616c2019-01-21 20:25:11 -0800670// conversion was successful or NULL otherwise. It never writes more than "sz"
671// bytes. Output will be truncated as needed, and a NUL character is always
672// appended.
673// NOTE: code from sandbox/linux/seccomp-bpf/demo.cc.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700674static char *itoa_r(uintptr_t i, char *buf, size_t sz, unsigned base, size_t padding) {
Austin Schuh906616c2019-01-21 20:25:11 -0800675 // Make sure we can write at least one NUL byte.
676 size_t n = 1;
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700677 if (n > sz) {
Austin Schuh906616c2019-01-21 20:25:11 -0800678 return NULL;
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700679 }
Austin Schuh906616c2019-01-21 20:25:11 -0800680
681 if (base < 2 || base > 16) {
682 buf[0] = '\000';
683 return NULL;
684 }
685
686 char *start = buf;
687
Austin Schuh906616c2019-01-21 20:25:11 -0800688 // Loop until we have converted the entire number. Output at least one
689 // character (i.e. '0').
690 char *ptr = start;
691 do {
692 // Make sure there is still enough space left in our output buffer.
693 if (++n > sz) {
694 buf[0] = '\000';
695 return NULL;
696 }
697
698 // Output the next digit.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700699 *ptr++ = "0123456789abcdef"[i % base];
700 i /= base;
Austin Schuh906616c2019-01-21 20:25:11 -0800701
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700702 if (padding > 0) {
Austin Schuh906616c2019-01-21 20:25:11 -0800703 padding--;
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700704 }
705 } while (i > 0 || padding > 0);
Austin Schuh906616c2019-01-21 20:25:11 -0800706
707 // Terminate the output with a NUL character.
708 *ptr = '\000';
709
710 // Conversion to ASCII actually resulted in the digits being in reverse
711 // order. We can't easily generate them in forward order, as we can't tell
712 // the number of characters needed until we are done converting.
713 // So, now, we reverse the string (except for the possible "-" sign).
714 while (--ptr > start) {
715 char ch = *ptr;
716 *ptr = *start;
717 *start++ = ch;
718 }
719 return buf;
720}
721
722// Safely appends string |source| to string |dest|. Never writes past the
723// buffer size |dest_size| and guarantees that |dest| is null-terminated.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700724static void SafeAppendString(const char* source, char* dest, size_t dest_size) {
725 size_t dest_string_length = strlen(dest);
Austin Schuh906616c2019-01-21 20:25:11 -0800726 SAFE_ASSERT(dest_string_length < dest_size);
727 dest += dest_string_length;
728 dest_size -= dest_string_length;
729 strncpy(dest, source, dest_size);
730 // Making sure |dest| is always null-terminated.
731 dest[dest_size - 1] = '\0';
732}
733
734// Converts a 64-bit value into a hex string, and safely appends it to |dest|.
735// Never writes past the buffer size |dest_size| and guarantees that |dest| is
736// null-terminated.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700737static void SafeAppendHexNumber(uint64_t value, char* dest, size_t dest_size) {
Austin Schuh906616c2019-01-21 20:25:11 -0800738 // 64-bit numbers in hex can have up to 16 digits.
739 char buf[17] = {'\0'};
740 SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size);
741}
742
743// The implementation of our symbolization routine. If it
744// successfully finds the symbol containing "pc" and obtains the
745// symbol name, returns true and write the symbol name to "out".
746// Otherwise, returns false. If Callback function is installed via
747// InstallSymbolizeCallback(), the function is also called in this function,
748// and "out" is used as its output.
749// To keep stack consumption low, we would like this function to not
750// get inlined.
751static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700752 size_t out_size) {
Austin Schuh906616c2019-01-21 20:25:11 -0800753 uint64_t pc0 = reinterpret_cast<uintptr_t>(pc);
754 uint64_t start_address = 0;
755 uint64_t base_address = 0;
756 int object_fd = -1;
757
758 if (out_size < 1) {
759 return false;
760 }
761 out[0] = '\0';
762 SafeAppendString("(", out, out_size);
763
764 if (g_symbolize_open_object_file_callback) {
765 object_fd = g_symbolize_open_object_file_callback(pc0, start_address,
766 base_address, out + 1,
767 out_size - 1);
768 } else {
769 object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address,
770 base_address,
771 out + 1,
772 out_size - 1);
773 }
774
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700775 FileDescriptor wrapped_object_fd(object_fd);
776
Austin Schuh906616c2019-01-21 20:25:11 -0800777#if defined(PRINT_UNSYMBOLIZED_STACK_TRACES)
778 {
Austin Schuh906616c2019-01-21 20:25:11 -0800779#else
780 // Check whether a file name was returned.
781 if (object_fd < 0) {
782#endif
783 if (out[1]) {
784 // The object file containing PC was determined successfully however the
785 // object file was not opened successfully. This is still considered
786 // success because the object file name and offset are known and tools
787 // like asan_symbolize.py can be used for the symbolization.
788 out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated.
789 SafeAppendString("+0x", out, out_size);
790 SafeAppendHexNumber(pc0 - base_address, out, out_size);
791 SafeAppendString(")", out, out_size);
792 return true;
793 }
794 // Failed to determine the object file containing PC. Bail out.
795 return false;
796 }
Austin Schuh906616c2019-01-21 20:25:11 -0800797 int elf_type = FileGetElfType(wrapped_object_fd.get());
798 if (elf_type == -1) {
799 return false;
800 }
801 if (g_symbolize_callback) {
802 // Run the call back if it's installed.
803 // Note: relocation (and much of the rest of this code) will be
804 // wrong for prelinked shared libraries and PIE executables.
805 uint64_t relocation = (elf_type == ET_DYN) ? start_address : 0;
806 int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(),
807 pc, out, out_size,
808 relocation);
809 if (num_bytes_written > 0) {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700810 out += static_cast<size_t>(num_bytes_written);
811 out_size -= static_cast<size_t>(num_bytes_written);
Austin Schuh906616c2019-01-21 20:25:11 -0800812 }
813 }
814 if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0,
815 out, out_size, base_address)) {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700816 if (out[1] && !g_symbolize_callback) {
817 // The object file containing PC was opened successfully however the
818 // symbol was not found. The object may have been stripped. This is still
819 // considered success because the object file name and offset are known
820 // and tools like asan_symbolize.py can be used for the symbolization.
821 out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated.
822 SafeAppendString("+0x", out, out_size);
823 SafeAppendHexNumber(pc0 - base_address, out, out_size);
824 SafeAppendString(")", out, out_size);
825 return true;
826 }
Austin Schuh906616c2019-01-21 20:25:11 -0800827 return false;
828 }
829
830 // Symbolization succeeded. Now we try to demangle the symbol.
831 DemangleInplace(out, out_size);
832 return true;
833}
834
835_END_GOOGLE_NAMESPACE_
836
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700837#elif (defined(GLOG_OS_MACOSX) || defined(GLOG_OS_EMSCRIPTEN)) && defined(HAVE_DLADDR)
Austin Schuh906616c2019-01-21 20:25:11 -0800838
839#include <dlfcn.h>
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700840#include <cstring>
Austin Schuh906616c2019-01-21 20:25:11 -0800841
842_START_GOOGLE_NAMESPACE_
843
844static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700845 size_t out_size) {
Austin Schuh906616c2019-01-21 20:25:11 -0800846 Dl_info info;
847 if (dladdr(pc, &info)) {
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700848 if (info.dli_sname) {
849 if (strlen(info.dli_sname) < out_size) {
850 strcpy(out, info.dli_sname);
851 // Symbolization succeeded. Now we try to demangle the symbol.
852 DemangleInplace(out, out_size);
853 return true;
854 }
Austin Schuh906616c2019-01-21 20:25:11 -0800855 }
856 }
857 return false;
858}
859
860_END_GOOGLE_NAMESPACE_
861
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700862#elif defined(GLOG_OS_WINDOWS) || defined(GLOG_OS_CYGWIN)
Austin Schuh906616c2019-01-21 20:25:11 -0800863
864#include <windows.h>
865#include <dbghelp.h>
866
867#ifdef _MSC_VER
868#pragma comment(lib, "dbghelp")
869#endif
870
871_START_GOOGLE_NAMESPACE_
872
873class SymInitializer {
874public:
875 HANDLE process;
876 bool ready;
877 SymInitializer() : process(NULL), ready(false) {
878 // Initialize the symbol handler.
879 // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680344(v=vs.85).aspx
880 process = GetCurrentProcess();
881 // Defer symbol loading.
882 // We do not request undecorated symbols with SYMOPT_UNDNAME
883 // because the mangling library calls UnDecorateSymbolName.
884 SymSetOptions(SYMOPT_DEFERRED_LOADS);
885 if (SymInitialize(process, NULL, true)) {
886 ready = true;
887 }
888 }
889 ~SymInitializer() {
890 SymCleanup(process);
891 // We do not need to close `HANDLE process` because it's a "pseudo handle."
892 }
893private:
894 SymInitializer(const SymInitializer&);
895 SymInitializer& operator=(const SymInitializer&);
896};
897
898static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700899 size_t out_size) {
Austin Schuh906616c2019-01-21 20:25:11 -0800900 const static SymInitializer symInitializer;
901 if (!symInitializer.ready) {
902 return false;
903 }
904 // Resolve symbol information from address.
905 // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680578(v=vs.85).aspx
906 char buf[sizeof(SYMBOL_INFO) + MAX_SYM_NAME];
907 SYMBOL_INFO *symbol = reinterpret_cast<SYMBOL_INFO *>(buf);
908 symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
909 symbol->MaxNameLen = MAX_SYM_NAME;
910 // We use the ANSI version to ensure the string type is always `char *`.
911 // This could break if a symbol has Unicode in it.
912 BOOL ret = SymFromAddr(symInitializer.process,
913 reinterpret_cast<DWORD64>(pc), 0, symbol);
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700914 if (ret == 1 && static_cast<ssize_t>(symbol->NameLen) < out_size) {
Austin Schuh906616c2019-01-21 20:25:11 -0800915 // `NameLen` does not include the null terminating character.
916 strncpy(out, symbol->Name, static_cast<size_t>(symbol->NameLen) + 1);
917 out[static_cast<size_t>(symbol->NameLen)] = '\0';
918 // Symbolization succeeded. Now we try to demangle the symbol.
919 DemangleInplace(out, out_size);
920 return true;
921 }
922 return false;
923}
924
925_END_GOOGLE_NAMESPACE_
926
927#else
928# error BUG: HAVE_SYMBOLIZE was wrongly set
929#endif
930
931_START_GOOGLE_NAMESPACE_
932
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700933bool Symbolize(void *pc, char *out, size_t out_size) {
Austin Schuh906616c2019-01-21 20:25:11 -0800934 return SymbolizeAndDemangle(pc, out, out_size);
935}
936
937_END_GOOGLE_NAMESPACE_
938
939#else /* HAVE_SYMBOLIZE */
940
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700941#include <cassert>
Austin Schuh906616c2019-01-21 20:25:11 -0800942
943#include "config.h"
944
945_START_GOOGLE_NAMESPACE_
946
947// TODO: Support other environments.
James Kuszmaulba0ac1a2022-08-12 16:29:30 -0700948bool Symbolize(void* /*pc*/, char* /*out*/, size_t /*out_size*/) {
Austin Schuh906616c2019-01-21 20:25:11 -0800949 assert(0);
950 return false;
951}
952
953_END_GOOGLE_NAMESPACE_
954
955#endif