Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | // Copyright (c) 2009, Google Inc. |
| 3 | // All rights reserved. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 4 | // |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 8 | // |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 18 | // |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | // --- |
| 32 | // Author: Craig Silverstein |
| 33 | // |
| 34 | // This forks out to pprof to do the actual symbolizing. We might |
| 35 | // be better off writing our own in C++. |
| 36 | |
| 37 | #include "config.h" |
| 38 | #include "symbolize.h" |
| 39 | #include <stdlib.h> |
| 40 | #ifdef HAVE_UNISTD_H |
| 41 | #include <unistd.h> // for write() |
| 42 | #endif |
| 43 | #ifdef HAVE_SYS_SOCKET_H |
| 44 | #include <sys/socket.h> // for socketpair() -- needed by Symbolize |
| 45 | #endif |
| 46 | #ifdef HAVE_SYS_WAIT_H |
| 47 | #include <sys/wait.h> // for wait() -- needed by Symbolize |
| 48 | #endif |
| 49 | #ifdef HAVE_POLL_H |
| 50 | #include <poll.h> |
| 51 | #endif |
| 52 | #ifdef __MACH__ |
| 53 | #include <mach-o/dyld.h> // for GetProgramInvocationName() |
| 54 | #include <limits.h> // for PATH_MAX |
| 55 | #endif |
| 56 | #if defined(__CYGWIN__) || defined(__CYGWIN32__) |
| 57 | #include <io.h> // for get_osfhandle() |
| 58 | #endif |
| 59 | #include <string> |
| 60 | #include "base/commandlineflags.h" |
| 61 | #include "base/logging.h" |
| 62 | #include "base/sysinfo.h" |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 63 | #if defined(__FreeBSD__) |
| 64 | #include <sys/sysctl.h> |
| 65 | #endif |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 66 | |
| 67 | using std::string; |
| 68 | using tcmalloc::DumpProcSelfMaps; // from sysinfo.h |
| 69 | |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 70 | // pprof may be used after destructors are |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 71 | // called (since that's when leak-checking is done), so we make |
| 72 | // a more-permanent copy that won't ever get destroyed. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 73 | static char* get_pprof_path() { |
| 74 | static char* result = ([] () { |
| 75 | string pprof_string = EnvToString("PPROF_PATH", "pprof-symbolize"); |
| 76 | return strdup(pprof_string.c_str()); |
| 77 | })(); |
| 78 | |
| 79 | return result; |
| 80 | } |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 81 | |
| 82 | // Returns NULL if we're on an OS where we can't get the invocation name. |
| 83 | // Using a static var is ok because we're not called from a thread. |
| 84 | static const char* GetProgramInvocationName() { |
| 85 | #if defined(HAVE_PROGRAM_INVOCATION_NAME) |
| 86 | #ifdef __UCLIBC__ |
| 87 | extern const char* program_invocation_name; // uclibc provides this |
| 88 | #else |
| 89 | extern char* program_invocation_name; // gcc provides this |
| 90 | #endif |
| 91 | return program_invocation_name; |
| 92 | #elif defined(__MACH__) |
| 93 | // We don't want to allocate memory for this since we may be |
| 94 | // calculating it when memory is corrupted. |
| 95 | static char program_invocation_name[PATH_MAX]; |
| 96 | if (program_invocation_name[0] == '\0') { // first time calculating |
| 97 | uint32_t length = sizeof(program_invocation_name); |
| 98 | if (_NSGetExecutablePath(program_invocation_name, &length)) |
| 99 | return NULL; |
| 100 | } |
| 101 | return program_invocation_name; |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 102 | #elif defined(__FreeBSD__) |
| 103 | static char program_invocation_name[PATH_MAX]; |
| 104 | size_t len = sizeof(program_invocation_name); |
| 105 | static const int name[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; |
| 106 | if (!sysctl(name, 4, program_invocation_name, &len, NULL, 0)) |
| 107 | return program_invocation_name; |
| 108 | return NULL; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 109 | #else |
| 110 | return NULL; // figure out a way to get argv[0] |
| 111 | #endif |
| 112 | } |
| 113 | |
| 114 | // Prints an error message when you can't run Symbolize(). |
| 115 | static void PrintError(const char* reason) { |
| 116 | RAW_LOG(ERROR, |
| 117 | "*** WARNING: Cannot convert addresses to symbols in output below.\n" |
| 118 | "*** Reason: %s\n" |
| 119 | "*** If you cannot fix this, try running pprof directly.\n", |
| 120 | reason); |
| 121 | } |
| 122 | |
| 123 | void SymbolTable::Add(const void* addr) { |
| 124 | symbolization_table_[addr] = ""; |
| 125 | } |
| 126 | |
| 127 | const char* SymbolTable::GetSymbol(const void* addr) { |
| 128 | return symbolization_table_[addr]; |
| 129 | } |
| 130 | |
| 131 | // Updates symbolization_table with the pointers to symbol names corresponding |
| 132 | // to its keys. The symbol names are stored in out, which is allocated and |
| 133 | // freed by the caller of this routine. |
| 134 | // Note that the forking/etc is not thread-safe or re-entrant. That's |
| 135 | // ok for the purpose we need -- reporting leaks detected by heap-checker |
| 136 | // -- but be careful if you decide to use this routine for other purposes. |
| 137 | // Returns number of symbols read on error. If can't symbolize, returns 0 |
| 138 | // and emits an error message about why. |
| 139 | int SymbolTable::Symbolize() { |
| 140 | #if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H) |
| 141 | PrintError("Perftools does not know how to call a sub-process on this O/S"); |
| 142 | return 0; |
| 143 | #else |
| 144 | const char* argv0 = GetProgramInvocationName(); |
| 145 | if (argv0 == NULL) { // can't call symbolize if we can't figure out our name |
| 146 | PrintError("Cannot figure out the name of this executable (argv0)"); |
| 147 | return 0; |
| 148 | } |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 149 | if (access(get_pprof_path(), R_OK) != 0) { |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 150 | PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)"); |
| 151 | return 0; |
| 152 | } |
| 153 | |
| 154 | // All this work is to do two-way communication. ugh. |
| 155 | int *child_in = NULL; // file descriptors |
| 156 | int *child_out = NULL; // for now, we don't worry about child_err |
| 157 | int child_fds[5][2]; // socketpair may be called up to five times below |
| 158 | |
| 159 | // The client program may close its stdin and/or stdout and/or stderr |
| 160 | // thus allowing socketpair to reuse file descriptors 0, 1 or 2. |
| 161 | // In this case the communication between the forked processes may be broken |
| 162 | // if either the parent or the child tries to close or duplicate these |
| 163 | // descriptors. The loop below produces two pairs of file descriptors, each |
| 164 | // greater than 2 (stderr). |
| 165 | for (int i = 0; i < 5; i++) { |
| 166 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) { |
| 167 | for (int j = 0; j < i; j++) { |
| 168 | close(child_fds[j][0]); |
| 169 | close(child_fds[j][1]); |
| 170 | PrintError("Cannot create a socket pair"); |
| 171 | } |
| 172 | return 0; |
| 173 | } else { |
| 174 | if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) { |
| 175 | if (child_in == NULL) { |
| 176 | child_in = child_fds[i]; |
| 177 | } else { |
| 178 | child_out = child_fds[i]; |
| 179 | for (int j = 0; j < i; j++) { |
| 180 | if (child_fds[j] == child_in) continue; |
| 181 | close(child_fds[j][0]); |
| 182 | close(child_fds[j][1]); |
| 183 | } |
| 184 | break; |
| 185 | } |
| 186 | } |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | switch (fork()) { |
| 191 | case -1: { // error |
| 192 | close(child_in[0]); |
| 193 | close(child_in[1]); |
| 194 | close(child_out[0]); |
| 195 | close(child_out[1]); |
| 196 | PrintError("Unknown error calling fork()"); |
| 197 | return 0; |
| 198 | } |
| 199 | case 0: { // child |
| 200 | close(child_in[1]); // child uses the 0's, parent uses the 1's |
| 201 | close(child_out[1]); // child uses the 0's, parent uses the 1's |
| 202 | close(0); |
| 203 | close(1); |
| 204 | if (dup2(child_in[0], 0) == -1) _exit(1); |
| 205 | if (dup2(child_out[0], 1) == -1) _exit(2); |
| 206 | // Unset vars that might cause trouble when we fork |
| 207 | unsetenv("CPUPROFILE"); |
| 208 | unsetenv("HEAPPROFILE"); |
| 209 | unsetenv("HEAPCHECK"); |
| 210 | unsetenv("PERFTOOLS_VERBOSE"); |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 211 | execlp(get_pprof_path(), get_pprof_path(), |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 212 | "--symbols", argv0, NULL); |
| 213 | _exit(3); // if execvp fails, it's bad news for us |
| 214 | } |
| 215 | default: { // parent |
| 216 | close(child_in[0]); // child uses the 0's, parent uses the 1's |
| 217 | close(child_out[0]); // child uses the 0's, parent uses the 1's |
| 218 | #ifdef HAVE_POLL_H |
| 219 | // Waiting for 1ms seems to give the OS time to notice any errors. |
| 220 | poll(0, 0, 1); |
| 221 | // For maximum safety, we check to make sure the execlp |
| 222 | // succeeded before trying to write. (Otherwise we'll get a |
| 223 | // SIGPIPE.) For systems without poll.h, we'll just skip this |
| 224 | // check, and trust that the user set PPROF_PATH correctly! |
| 225 | struct pollfd pfd = { child_in[1], POLLOUT, 0 }; |
| 226 | if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) || |
| 227 | (pfd.revents & (POLLHUP|POLLERR))) { |
| 228 | PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)"); |
| 229 | return 0; |
| 230 | } |
| 231 | #endif |
| 232 | #if defined(__CYGWIN__) || defined(__CYGWIN32__) |
| 233 | // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert. |
| 234 | const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]); |
| 235 | DumpProcSelfMaps(symbols_handle); |
| 236 | #else |
| 237 | DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin |
| 238 | #endif |
| 239 | |
| 240 | // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each |
| 241 | // address to feed to pprof. |
| 242 | const int kOutBufSize = 24 * symbolization_table_.size(); |
| 243 | char *pprof_buffer = new char[kOutBufSize]; |
| 244 | int written = 0; |
| 245 | for (SymbolMap::const_iterator iter = symbolization_table_.begin(); |
| 246 | iter != symbolization_table_.end(); ++iter) { |
| 247 | written += snprintf(pprof_buffer + written, kOutBufSize - written, |
| 248 | // pprof expects format to be 0xXXXXXX |
| 249 | "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first)); |
| 250 | } |
| 251 | write(child_in[1], pprof_buffer, strlen(pprof_buffer)); |
| 252 | close(child_in[1]); // that's all we need to write |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 253 | delete[] pprof_buffer; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 254 | |
| 255 | const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size(); |
| 256 | int total_bytes_read = 0; |
| 257 | delete[] symbol_buffer_; |
| 258 | symbol_buffer_ = new char[kSymbolBufferSize]; |
| 259 | memset(symbol_buffer_, '\0', kSymbolBufferSize); |
| 260 | while (1) { |
| 261 | int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read, |
| 262 | kSymbolBufferSize - total_bytes_read); |
| 263 | if (bytes_read < 0) { |
| 264 | close(child_out[1]); |
| 265 | PrintError("Cannot read data from pprof"); |
| 266 | return 0; |
| 267 | } else if (bytes_read == 0) { |
| 268 | close(child_out[1]); |
| 269 | wait(NULL); |
| 270 | break; |
| 271 | } else { |
| 272 | total_bytes_read += bytes_read; |
| 273 | } |
| 274 | } |
| 275 | // We have successfully read the output of pprof into out. Make sure |
| 276 | // the last symbol is full (we can tell because it ends with a \n). |
| 277 | if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n') |
| 278 | return 0; |
| 279 | // make the symbolization_table_ values point to the output vector |
| 280 | SymbolMap::iterator fill = symbolization_table_.begin(); |
| 281 | int num_symbols = 0; |
| 282 | const char *current_name = symbol_buffer_; |
| 283 | for (int i = 0; i < total_bytes_read; i++) { |
| 284 | if (symbol_buffer_[i] == '\n') { |
| 285 | fill->second = current_name; |
| 286 | symbol_buffer_[i] = '\0'; |
| 287 | current_name = symbol_buffer_ + i + 1; |
| 288 | fill++; |
| 289 | num_symbols++; |
| 290 | } |
| 291 | } |
| 292 | return num_symbols; |
| 293 | } |
| 294 | } |
| 295 | PrintError("Unkown error (should never occur!)"); |
| 296 | return 0; // shouldn't be reachable |
| 297 | #endif |
| 298 | } |