blob: 8c94c18fffae811895c835163ae7a03e65d563b5 [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2009, Google Inc.
3// All rights reserved.
Brian Silverman20350ac2021-11-17 18:19:55 -08004//
Austin Schuh745610d2015-09-06 18:19:50 -07005// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
Brian Silverman20350ac2021-11-17 18:19:55 -08008//
Austin Schuh745610d2015-09-06 18:19:50 -07009// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
Brian Silverman20350ac2021-11-17 18:19:55 -080018//
Austin Schuh745610d2015-09-06 18:19:50 -070019// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Craig Silverstein
33//
34// This forks out to pprof to do the actual symbolizing. We might
35// be better off writing our own in C++.
36
37#include "config.h"
38#include "symbolize.h"
39#include <stdlib.h>
40#ifdef HAVE_UNISTD_H
41#include <unistd.h> // for write()
42#endif
43#ifdef HAVE_SYS_SOCKET_H
44#include <sys/socket.h> // for socketpair() -- needed by Symbolize
45#endif
46#ifdef HAVE_SYS_WAIT_H
47#include <sys/wait.h> // for wait() -- needed by Symbolize
48#endif
49#ifdef HAVE_POLL_H
50#include <poll.h>
51#endif
52#ifdef __MACH__
53#include <mach-o/dyld.h> // for GetProgramInvocationName()
54#include <limits.h> // for PATH_MAX
55#endif
56#if defined(__CYGWIN__) || defined(__CYGWIN32__)
57#include <io.h> // for get_osfhandle()
58#endif
59#include <string>
60#include "base/commandlineflags.h"
61#include "base/logging.h"
62#include "base/sysinfo.h"
Brian Silverman20350ac2021-11-17 18:19:55 -080063#if defined(__FreeBSD__)
64#include <sys/sysctl.h>
65#endif
Austin Schuh745610d2015-09-06 18:19:50 -070066
67using std::string;
68using tcmalloc::DumpProcSelfMaps; // from sysinfo.h
69
Brian Silverman20350ac2021-11-17 18:19:55 -080070// pprof may be used after destructors are
Austin Schuh745610d2015-09-06 18:19:50 -070071// called (since that's when leak-checking is done), so we make
72// a more-permanent copy that won't ever get destroyed.
Brian Silverman20350ac2021-11-17 18:19:55 -080073static char* get_pprof_path() {
74 static char* result = ([] () {
75 string pprof_string = EnvToString("PPROF_PATH", "pprof-symbolize");
76 return strdup(pprof_string.c_str());
77 })();
78
79 return result;
80}
Austin Schuh745610d2015-09-06 18:19:50 -070081
82// Returns NULL if we're on an OS where we can't get the invocation name.
83// Using a static var is ok because we're not called from a thread.
84static const char* GetProgramInvocationName() {
85#if defined(HAVE_PROGRAM_INVOCATION_NAME)
86#ifdef __UCLIBC__
87 extern const char* program_invocation_name; // uclibc provides this
88#else
89 extern char* program_invocation_name; // gcc provides this
90#endif
91 return program_invocation_name;
92#elif defined(__MACH__)
93 // We don't want to allocate memory for this since we may be
94 // calculating it when memory is corrupted.
95 static char program_invocation_name[PATH_MAX];
96 if (program_invocation_name[0] == '\0') { // first time calculating
97 uint32_t length = sizeof(program_invocation_name);
98 if (_NSGetExecutablePath(program_invocation_name, &length))
99 return NULL;
100 }
101 return program_invocation_name;
Brian Silverman20350ac2021-11-17 18:19:55 -0800102#elif defined(__FreeBSD__)
103 static char program_invocation_name[PATH_MAX];
104 size_t len = sizeof(program_invocation_name);
105 static const int name[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
106 if (!sysctl(name, 4, program_invocation_name, &len, NULL, 0))
107 return program_invocation_name;
108 return NULL;
Austin Schuh745610d2015-09-06 18:19:50 -0700109#else
110 return NULL; // figure out a way to get argv[0]
111#endif
112}
113
114// Prints an error message when you can't run Symbolize().
115static void PrintError(const char* reason) {
116 RAW_LOG(ERROR,
117 "*** WARNING: Cannot convert addresses to symbols in output below.\n"
118 "*** Reason: %s\n"
119 "*** If you cannot fix this, try running pprof directly.\n",
120 reason);
121}
122
123void SymbolTable::Add(const void* addr) {
124 symbolization_table_[addr] = "";
125}
126
127const char* SymbolTable::GetSymbol(const void* addr) {
128 return symbolization_table_[addr];
129}
130
131// Updates symbolization_table with the pointers to symbol names corresponding
132// to its keys. The symbol names are stored in out, which is allocated and
133// freed by the caller of this routine.
134// Note that the forking/etc is not thread-safe or re-entrant. That's
135// ok for the purpose we need -- reporting leaks detected by heap-checker
136// -- but be careful if you decide to use this routine for other purposes.
137// Returns number of symbols read on error. If can't symbolize, returns 0
138// and emits an error message about why.
139int SymbolTable::Symbolize() {
140#if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
141 PrintError("Perftools does not know how to call a sub-process on this O/S");
142 return 0;
143#else
144 const char* argv0 = GetProgramInvocationName();
145 if (argv0 == NULL) { // can't call symbolize if we can't figure out our name
146 PrintError("Cannot figure out the name of this executable (argv0)");
147 return 0;
148 }
Brian Silverman20350ac2021-11-17 18:19:55 -0800149 if (access(get_pprof_path(), R_OK) != 0) {
Austin Schuh745610d2015-09-06 18:19:50 -0700150 PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
151 return 0;
152 }
153
154 // All this work is to do two-way communication. ugh.
155 int *child_in = NULL; // file descriptors
156 int *child_out = NULL; // for now, we don't worry about child_err
157 int child_fds[5][2]; // socketpair may be called up to five times below
158
159 // The client program may close its stdin and/or stdout and/or stderr
160 // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
161 // In this case the communication between the forked processes may be broken
162 // if either the parent or the child tries to close or duplicate these
163 // descriptors. The loop below produces two pairs of file descriptors, each
164 // greater than 2 (stderr).
165 for (int i = 0; i < 5; i++) {
166 if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
167 for (int j = 0; j < i; j++) {
168 close(child_fds[j][0]);
169 close(child_fds[j][1]);
170 PrintError("Cannot create a socket pair");
171 }
172 return 0;
173 } else {
174 if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
175 if (child_in == NULL) {
176 child_in = child_fds[i];
177 } else {
178 child_out = child_fds[i];
179 for (int j = 0; j < i; j++) {
180 if (child_fds[j] == child_in) continue;
181 close(child_fds[j][0]);
182 close(child_fds[j][1]);
183 }
184 break;
185 }
186 }
187 }
188 }
189
190 switch (fork()) {
191 case -1: { // error
192 close(child_in[0]);
193 close(child_in[1]);
194 close(child_out[0]);
195 close(child_out[1]);
196 PrintError("Unknown error calling fork()");
197 return 0;
198 }
199 case 0: { // child
200 close(child_in[1]); // child uses the 0's, parent uses the 1's
201 close(child_out[1]); // child uses the 0's, parent uses the 1's
202 close(0);
203 close(1);
204 if (dup2(child_in[0], 0) == -1) _exit(1);
205 if (dup2(child_out[0], 1) == -1) _exit(2);
206 // Unset vars that might cause trouble when we fork
207 unsetenv("CPUPROFILE");
208 unsetenv("HEAPPROFILE");
209 unsetenv("HEAPCHECK");
210 unsetenv("PERFTOOLS_VERBOSE");
Brian Silverman20350ac2021-11-17 18:19:55 -0800211 execlp(get_pprof_path(), get_pprof_path(),
Austin Schuh745610d2015-09-06 18:19:50 -0700212 "--symbols", argv0, NULL);
213 _exit(3); // if execvp fails, it's bad news for us
214 }
215 default: { // parent
216 close(child_in[0]); // child uses the 0's, parent uses the 1's
217 close(child_out[0]); // child uses the 0's, parent uses the 1's
218#ifdef HAVE_POLL_H
219 // Waiting for 1ms seems to give the OS time to notice any errors.
220 poll(0, 0, 1);
221 // For maximum safety, we check to make sure the execlp
222 // succeeded before trying to write. (Otherwise we'll get a
223 // SIGPIPE.) For systems without poll.h, we'll just skip this
224 // check, and trust that the user set PPROF_PATH correctly!
225 struct pollfd pfd = { child_in[1], POLLOUT, 0 };
226 if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
227 (pfd.revents & (POLLHUP|POLLERR))) {
228 PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
229 return 0;
230 }
231#endif
232#if defined(__CYGWIN__) || defined(__CYGWIN32__)
233 // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert.
234 const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
235 DumpProcSelfMaps(symbols_handle);
236#else
237 DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin
238#endif
239
240 // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
241 // address to feed to pprof.
242 const int kOutBufSize = 24 * symbolization_table_.size();
243 char *pprof_buffer = new char[kOutBufSize];
244 int written = 0;
245 for (SymbolMap::const_iterator iter = symbolization_table_.begin();
246 iter != symbolization_table_.end(); ++iter) {
247 written += snprintf(pprof_buffer + written, kOutBufSize - written,
248 // pprof expects format to be 0xXXXXXX
249 "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
250 }
251 write(child_in[1], pprof_buffer, strlen(pprof_buffer));
252 close(child_in[1]); // that's all we need to write
Brian Silverman20350ac2021-11-17 18:19:55 -0800253 delete[] pprof_buffer;
Austin Schuh745610d2015-09-06 18:19:50 -0700254
255 const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
256 int total_bytes_read = 0;
257 delete[] symbol_buffer_;
258 symbol_buffer_ = new char[kSymbolBufferSize];
259 memset(symbol_buffer_, '\0', kSymbolBufferSize);
260 while (1) {
261 int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
262 kSymbolBufferSize - total_bytes_read);
263 if (bytes_read < 0) {
264 close(child_out[1]);
265 PrintError("Cannot read data from pprof");
266 return 0;
267 } else if (bytes_read == 0) {
268 close(child_out[1]);
269 wait(NULL);
270 break;
271 } else {
272 total_bytes_read += bytes_read;
273 }
274 }
275 // We have successfully read the output of pprof into out. Make sure
276 // the last symbol is full (we can tell because it ends with a \n).
277 if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
278 return 0;
279 // make the symbolization_table_ values point to the output vector
280 SymbolMap::iterator fill = symbolization_table_.begin();
281 int num_symbols = 0;
282 const char *current_name = symbol_buffer_;
283 for (int i = 0; i < total_bytes_read; i++) {
284 if (symbol_buffer_[i] == '\n') {
285 fill->second = current_name;
286 symbol_buffer_[i] = '\0';
287 current_name = symbol_buffer_ + i + 1;
288 fill++;
289 num_symbols++;
290 }
291 }
292 return num_symbols;
293 }
294 }
295 PrintError("Unkown error (should never occur!)");
296 return 0; // shouldn't be reachable
297#endif
298}