Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | /* Copyright (c) 2005-2007, Google Inc. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions are |
| 7 | * met: |
| 8 | * |
| 9 | * * Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * * Redistributions in binary form must reproduce the above |
| 12 | * copyright notice, this list of conditions and the following disclaimer |
| 13 | * in the documentation and/or other materials provided with the |
| 14 | * distribution. |
| 15 | * * Neither the name of Google Inc. nor the names of its |
| 16 | * contributors may be used to endorse or promote products derived from |
| 17 | * this software without specific prior written permission. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | * |
| 31 | * --- |
| 32 | * Author: Markus Gutschke |
| 33 | */ |
| 34 | |
| 35 | #include "base/linuxthreads.h" |
| 36 | |
| 37 | #ifdef THREADS |
| 38 | #ifdef __cplusplus |
| 39 | extern "C" { |
| 40 | #endif |
| 41 | |
| 42 | #include <sched.h> |
| 43 | #include <signal.h> |
| 44 | #include <stdlib.h> |
| 45 | #include <string.h> |
| 46 | #include <fcntl.h> |
| 47 | #include <sys/socket.h> |
| 48 | #include <sys/wait.h> |
| 49 | #include <sys/prctl.h> |
| 50 | #include <semaphore.h> |
| 51 | |
| 52 | #include "base/linux_syscall_support.h" |
| 53 | #include "base/thread_lister.h" |
| 54 | |
| 55 | #ifndef CLONE_UNTRACED |
| 56 | #define CLONE_UNTRACED 0x00800000 |
| 57 | #endif |
| 58 | |
| 59 | |
| 60 | /* Synchronous signals that should not be blocked while in the lister thread. |
| 61 | */ |
| 62 | static const int sync_signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS, |
| 63 | SIGXCPU, SIGXFSZ }; |
| 64 | |
| 65 | /* itoa() is not a standard function, and we cannot safely call printf() |
| 66 | * after suspending threads. So, we just implement our own copy. A |
| 67 | * recursive approach is the easiest here. |
| 68 | */ |
| 69 | static char *local_itoa(char *buf, int i) { |
| 70 | if (i < 0) { |
| 71 | *buf++ = '-'; |
| 72 | return local_itoa(buf, -i); |
| 73 | } else { |
| 74 | if (i >= 10) |
| 75 | buf = local_itoa(buf, i/10); |
| 76 | *buf++ = (i%10) + '0'; |
| 77 | *buf = '\000'; |
| 78 | return buf; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | |
| 83 | /* Wrapper around clone() that runs "fn" on the same stack as the |
| 84 | * caller! Unlike fork(), the cloned thread shares the same address space. |
| 85 | * The caller must be careful to use only minimal amounts of stack until |
| 86 | * the cloned thread has returned. |
| 87 | * There is a good chance that the cloned thread and the caller will share |
| 88 | * the same copy of errno! |
| 89 | */ |
| 90 | #ifdef __GNUC__ |
| 91 | #if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 |
| 92 | /* Try to force this function into a separate stack frame, and make sure |
| 93 | * that arguments are passed on the stack. |
| 94 | */ |
| 95 | static int local_clone (int (*fn)(void *), void *arg, ...) |
| 96 | __attribute__ ((noinline)); |
| 97 | #endif |
| 98 | #endif |
| 99 | |
| 100 | /* To avoid the gap cross page boundaries, increase by the large parge |
| 101 | * size mostly PowerPC system uses. */ |
| 102 | #ifdef __PPC64__ |
| 103 | #define CLONE_STACK_SIZE 65536 |
| 104 | #else |
| 105 | #define CLONE_STACK_SIZE 4096 |
| 106 | #endif |
| 107 | |
| 108 | static int local_clone (int (*fn)(void *), void *arg, ...) { |
| 109 | /* Leave 4kB of gap between the callers stack and the new clone. This |
| 110 | * should be more than sufficient for the caller to call waitpid() until |
| 111 | * the cloned thread terminates. |
| 112 | * |
| 113 | * It is important that we set the CLONE_UNTRACED flag, because newer |
| 114 | * versions of "gdb" otherwise attempt to attach to our thread, and will |
| 115 | * attempt to reap its status codes. This subsequently results in the |
| 116 | * caller hanging indefinitely in waitpid(), waiting for a change in |
| 117 | * status that will never happen. By setting the CLONE_UNTRACED flag, we |
| 118 | * prevent "gdb" from stealing events, but we still expect the thread |
| 119 | * lister to fail, because it cannot PTRACE_ATTACH to the process that |
| 120 | * is being debugged. This is OK and the error code will be reported |
| 121 | * correctly. |
| 122 | */ |
| 123 | return sys_clone(fn, (char *)&arg - CLONE_STACK_SIZE, |
| 124 | CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg, 0, 0, 0); |
| 125 | } |
| 126 | |
| 127 | |
| 128 | /* Local substitute for the atoi() function, which is not necessarily safe |
| 129 | * to call once threads are suspended (depending on whether libc looks up |
| 130 | * locale information, when executing atoi()). |
| 131 | */ |
| 132 | static int local_atoi(const char *s) { |
| 133 | int n = 0; |
| 134 | int neg = *s == '-'; |
| 135 | if (neg) |
| 136 | s++; |
| 137 | while (*s >= '0' && *s <= '9') |
| 138 | n = 10*n + (*s++ - '0'); |
| 139 | return neg ? -n : n; |
| 140 | } |
| 141 | |
| 142 | |
| 143 | /* Re-runs fn until it doesn't cause EINTR |
| 144 | */ |
| 145 | #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) |
| 146 | |
| 147 | |
| 148 | /* Wrap a class around system calls, in order to give us access to |
| 149 | * a private copy of errno. This only works in C++, but it has the |
| 150 | * advantage of not needing nested functions, which are a non-standard |
| 151 | * language extension. |
| 152 | */ |
| 153 | #ifdef __cplusplus |
| 154 | namespace { |
| 155 | class SysCalls { |
| 156 | public: |
| 157 | #define SYS_CPLUSPLUS |
| 158 | #define SYS_ERRNO my_errno |
| 159 | #define SYS_INLINE inline |
| 160 | #define SYS_PREFIX -1 |
| 161 | #undef SYS_LINUX_SYSCALL_SUPPORT_H |
| 162 | #include "linux_syscall_support.h" |
| 163 | SysCalls() : my_errno(0) { } |
| 164 | int my_errno; |
| 165 | }; |
| 166 | } |
| 167 | #define ERRNO sys.my_errno |
| 168 | #else |
| 169 | #define ERRNO my_errno |
| 170 | #endif |
| 171 | |
| 172 | |
| 173 | /* Wrapper for open() which is guaranteed to never return EINTR. |
| 174 | */ |
| 175 | static int c_open(const char *fname, int flags, int mode) { |
| 176 | ssize_t rc; |
| 177 | NO_INTR(rc = sys_open(fname, flags, mode)); |
| 178 | return rc; |
| 179 | } |
| 180 | |
| 181 | |
| 182 | /* abort() is not safely reentrant, and changes it's behavior each time |
| 183 | * it is called. This means, if the main application ever called abort() |
| 184 | * we cannot safely call it again. This would happen if we were called |
| 185 | * from a SIGABRT signal handler in the main application. So, document |
| 186 | * that calling SIGABRT from the thread lister makes it not signal safe |
| 187 | * (and vice-versa). |
| 188 | * Also, since we share address space with the main application, we |
| 189 | * cannot call abort() from the callback and expect the main application |
| 190 | * to behave correctly afterwards. In fact, the only thing we can do, is |
| 191 | * to terminate the main application with extreme prejudice (aka |
| 192 | * PTRACE_KILL). |
| 193 | * We set up our own SIGABRT handler to do this. |
| 194 | * In order to find the main application from the signal handler, we |
| 195 | * need to store information about it in global variables. This is |
| 196 | * safe, because the main application should be suspended at this |
| 197 | * time. If the callback ever called TCMalloc_ResumeAllProcessThreads(), then |
| 198 | * we are running a higher risk, though. So, try to avoid calling |
| 199 | * abort() after calling TCMalloc_ResumeAllProcessThreads. |
| 200 | */ |
| 201 | static volatile int *sig_pids, sig_num_threads, sig_proc, sig_marker; |
| 202 | |
| 203 | |
| 204 | /* Signal handler to help us recover from dying while we are attached to |
| 205 | * other threads. |
| 206 | */ |
| 207 | static void SignalHandler(int signum, siginfo_t *si, void *data) { |
| 208 | if (sig_pids != NULL) { |
| 209 | if (signum == SIGABRT) { |
| 210 | while (sig_num_threads-- > 0) { |
| 211 | /* Not sure if sched_yield is really necessary here, but it does not */ |
| 212 | /* hurt, and it might be necessary for the same reasons that we have */ |
| 213 | /* to do so in sys_ptrace_detach(). */ |
| 214 | sys_sched_yield(); |
| 215 | sys_ptrace(PTRACE_KILL, sig_pids[sig_num_threads], 0, 0); |
| 216 | } |
| 217 | } else if (sig_num_threads > 0) { |
| 218 | TCMalloc_ResumeAllProcessThreads(sig_num_threads, (int *)sig_pids); |
| 219 | } |
| 220 | } |
| 221 | sig_pids = NULL; |
| 222 | if (sig_marker >= 0) |
| 223 | NO_INTR(sys_close(sig_marker)); |
| 224 | sig_marker = -1; |
| 225 | if (sig_proc >= 0) |
| 226 | NO_INTR(sys_close(sig_proc)); |
| 227 | sig_proc = -1; |
| 228 | |
| 229 | sys__exit(signum == SIGABRT ? 1 : 2); |
| 230 | } |
| 231 | |
| 232 | |
| 233 | /* Try to dirty the stack, and hope that the compiler is not smart enough |
| 234 | * to optimize this function away. Or worse, the compiler could inline the |
| 235 | * function and permanently allocate the data on the stack. |
| 236 | */ |
| 237 | static void DirtyStack(size_t amount) { |
| 238 | char buf[amount]; |
| 239 | memset(buf, 0, amount); |
| 240 | sys_read(-1, buf, amount); |
| 241 | } |
| 242 | |
| 243 | |
| 244 | /* Data structure for passing arguments to the lister thread. |
| 245 | */ |
| 246 | #define ALT_STACKSIZE (MINSIGSTKSZ + 4096) |
| 247 | |
| 248 | struct ListerParams { |
| 249 | int result, err; |
| 250 | char *altstack_mem; |
| 251 | ListAllProcessThreadsCallBack callback; |
| 252 | void *parameter; |
| 253 | va_list ap; |
| 254 | sem_t *lock; |
| 255 | }; |
| 256 | |
| 257 | |
| 258 | static void ListerThread(struct ListerParams *args) { |
| 259 | int found_parent = 0; |
| 260 | pid_t clone_pid = sys_gettid(), ppid = sys_getppid(); |
| 261 | char proc_self_task[80], marker_name[48], *marker_path; |
| 262 | const char *proc_paths[3]; |
| 263 | const char *const *proc_path = proc_paths; |
| 264 | int proc = -1, marker = -1, num_threads = 0; |
| 265 | int max_threads = 0, sig; |
| 266 | struct kernel_stat marker_sb, proc_sb; |
| 267 | stack_t altstack; |
| 268 | |
| 269 | /* Wait for parent thread to set appropriate permissions |
| 270 | * to allow ptrace activity |
| 271 | */ |
| 272 | if (sem_wait(args->lock) < 0) { |
| 273 | goto failure; |
| 274 | } |
| 275 | |
| 276 | /* Create "marker" that we can use to detect threads sharing the same |
| 277 | * address space and the same file handles. By setting the FD_CLOEXEC flag |
| 278 | * we minimize the risk of misidentifying child processes as threads; |
| 279 | * and since there is still a race condition, we will filter those out |
| 280 | * later, anyway. |
| 281 | */ |
| 282 | if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 || |
| 283 | sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0) { |
| 284 | failure: |
| 285 | args->result = -1; |
| 286 | args->err = errno; |
| 287 | if (marker >= 0) |
| 288 | NO_INTR(sys_close(marker)); |
| 289 | sig_marker = marker = -1; |
| 290 | if (proc >= 0) |
| 291 | NO_INTR(sys_close(proc)); |
| 292 | sig_proc = proc = -1; |
| 293 | sys__exit(1); |
| 294 | } |
| 295 | |
| 296 | /* Compute search paths for finding thread directories in /proc */ |
| 297 | local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid); |
| 298 | strcpy(marker_name, proc_self_task); |
| 299 | marker_path = marker_name + strlen(marker_name); |
| 300 | strcat(proc_self_task, "/task/"); |
| 301 | proc_paths[0] = proc_self_task; /* /proc/$$/task/ */ |
| 302 | proc_paths[1] = "/proc/"; /* /proc/ */ |
| 303 | proc_paths[2] = NULL; |
| 304 | |
| 305 | /* Compute path for marker socket in /proc */ |
| 306 | local_itoa(strcpy(marker_path, "/fd/") + 4, marker); |
| 307 | if (sys_stat(marker_name, &marker_sb) < 0) { |
| 308 | goto failure; |
| 309 | } |
| 310 | |
| 311 | /* Catch signals on an alternate pre-allocated stack. This way, we can |
| 312 | * safely execute the signal handler even if we ran out of memory. |
| 313 | */ |
| 314 | memset(&altstack, 0, sizeof(altstack)); |
| 315 | altstack.ss_sp = args->altstack_mem; |
| 316 | altstack.ss_flags = 0; |
| 317 | altstack.ss_size = ALT_STACKSIZE; |
| 318 | sys_sigaltstack(&altstack, (const stack_t *)NULL); |
| 319 | |
| 320 | /* Some kernels forget to wake up traced processes, when the |
| 321 | * tracer dies. So, intercept synchronous signals and make sure |
| 322 | * that we wake up our tracees before dying. It is the caller's |
| 323 | * responsibility to ensure that asynchronous signals do not |
| 324 | * interfere with this function. |
| 325 | */ |
| 326 | sig_marker = marker; |
| 327 | sig_proc = -1; |
| 328 | for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { |
| 329 | struct kernel_sigaction sa; |
| 330 | memset(&sa, 0, sizeof(sa)); |
| 331 | sa.sa_sigaction_ = SignalHandler; |
| 332 | sys_sigfillset(&sa.sa_mask); |
| 333 | sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND; |
| 334 | sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL); |
| 335 | } |
| 336 | |
| 337 | /* Read process directories in /proc/... */ |
| 338 | for (;;) { |
| 339 | /* Some kernels know about threads, and hide them in "/proc" |
| 340 | * (although they are still there, if you know the process |
| 341 | * id). Threads are moved into a separate "task" directory. We |
| 342 | * check there first, and then fall back on the older naming |
| 343 | * convention if necessary. |
| 344 | */ |
| 345 | if ((sig_proc = proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) { |
| 346 | if (*++proc_path != NULL) |
| 347 | continue; |
| 348 | goto failure; |
| 349 | } |
| 350 | if (sys_fstat(proc, &proc_sb) < 0) |
| 351 | goto failure; |
| 352 | |
| 353 | /* Since we are suspending threads, we cannot call any libc |
| 354 | * functions that might acquire locks. Most notably, we cannot |
| 355 | * call malloc(). So, we have to allocate memory on the stack, |
| 356 | * instead. Since we do not know how much memory we need, we |
| 357 | * make a best guess. And if we guessed incorrectly we retry on |
| 358 | * a second iteration (by jumping to "detach_threads"). |
| 359 | * |
| 360 | * Unless the number of threads is increasing very rapidly, we |
| 361 | * should never need to do so, though, as our guestimate is very |
| 362 | * conservative. |
| 363 | */ |
| 364 | if (max_threads < proc_sb.st_nlink + 100) |
| 365 | max_threads = proc_sb.st_nlink + 100; |
| 366 | |
| 367 | /* scope */ { |
| 368 | pid_t pids[max_threads]; |
| 369 | int added_entries = 0; |
| 370 | sig_num_threads = num_threads; |
| 371 | sig_pids = pids; |
| 372 | for (;;) { |
| 373 | struct KERNEL_DIRENT *entry; |
| 374 | char buf[4096]; |
| 375 | ssize_t nbytes = GETDENTS(proc, (struct KERNEL_DIRENT *)buf, |
| 376 | sizeof(buf)); |
| 377 | if (nbytes < 0) |
| 378 | goto failure; |
| 379 | else if (nbytes == 0) { |
| 380 | if (added_entries) { |
| 381 | /* Need to keep iterating over "/proc" in multiple |
| 382 | * passes until we no longer find any more threads. This |
| 383 | * algorithm eventually completes, when all threads have |
| 384 | * been suspended. |
| 385 | */ |
| 386 | added_entries = 0; |
| 387 | sys_lseek(proc, 0, SEEK_SET); |
| 388 | continue; |
| 389 | } |
| 390 | break; |
| 391 | } |
| 392 | for (entry = (struct KERNEL_DIRENT *)buf; |
| 393 | entry < (struct KERNEL_DIRENT *)&buf[nbytes]; |
| 394 | entry = (struct KERNEL_DIRENT *)((char *)entry+entry->d_reclen)) { |
| 395 | if (entry->d_ino != 0) { |
| 396 | const char *ptr = entry->d_name; |
| 397 | pid_t pid; |
| 398 | |
| 399 | /* Some kernels hide threads by preceding the pid with a '.' */ |
| 400 | if (*ptr == '.') |
| 401 | ptr++; |
| 402 | |
| 403 | /* If the directory is not numeric, it cannot be a |
| 404 | * process/thread |
| 405 | */ |
| 406 | if (*ptr < '0' || *ptr > '9') |
| 407 | continue; |
| 408 | pid = local_atoi(ptr); |
| 409 | |
| 410 | /* Attach (and suspend) all threads */ |
| 411 | if (pid && pid != clone_pid) { |
| 412 | struct kernel_stat tmp_sb; |
| 413 | char fname[entry->d_reclen + 48]; |
| 414 | strcat(strcat(strcpy(fname, "/proc/"), |
| 415 | entry->d_name), marker_path); |
| 416 | |
| 417 | /* Check if the marker is identical to the one we created */ |
| 418 | if (sys_stat(fname, &tmp_sb) >= 0 && |
| 419 | marker_sb.st_ino == tmp_sb.st_ino) { |
| 420 | long i, j; |
| 421 | |
| 422 | /* Found one of our threads, make sure it is no duplicate */ |
| 423 | for (i = 0; i < num_threads; i++) { |
| 424 | /* Linear search is slow, but should not matter much for |
| 425 | * the typically small number of threads. |
| 426 | */ |
| 427 | if (pids[i] == pid) { |
| 428 | /* Found a duplicate; most likely on second pass */ |
| 429 | goto next_entry; |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | /* Check whether data structure needs growing */ |
| 434 | if (num_threads >= max_threads) { |
| 435 | /* Back to square one, this time with more memory */ |
| 436 | NO_INTR(sys_close(proc)); |
| 437 | goto detach_threads; |
| 438 | } |
| 439 | |
| 440 | /* Attaching to thread suspends it */ |
| 441 | pids[num_threads++] = pid; |
| 442 | sig_num_threads = num_threads; |
| 443 | if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0, |
| 444 | (void *)0) < 0) { |
| 445 | /* If operation failed, ignore thread. Maybe it |
| 446 | * just died? There might also be a race |
| 447 | * condition with a concurrent core dumper or |
| 448 | * with a debugger. In that case, we will just |
| 449 | * make a best effort, rather than failing |
| 450 | * entirely. |
| 451 | */ |
| 452 | num_threads--; |
| 453 | sig_num_threads = num_threads; |
| 454 | goto next_entry; |
| 455 | } |
| 456 | while (sys_waitpid(pid, (int *)0, __WALL) < 0) { |
| 457 | if (errno != EINTR) { |
| 458 | sys_ptrace_detach(pid); |
| 459 | num_threads--; |
| 460 | sig_num_threads = num_threads; |
| 461 | goto next_entry; |
| 462 | } |
| 463 | } |
| 464 | |
| 465 | if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j || |
| 466 | sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i != j) { |
| 467 | /* Address spaces are distinct, even though both |
| 468 | * processes show the "marker". This is probably |
| 469 | * a forked child process rather than a thread. |
| 470 | */ |
| 471 | sys_ptrace_detach(pid); |
| 472 | num_threads--; |
| 473 | sig_num_threads = num_threads; |
| 474 | } else { |
| 475 | found_parent |= pid == ppid; |
| 476 | added_entries++; |
| 477 | } |
| 478 | } |
| 479 | } |
| 480 | } |
| 481 | next_entry:; |
| 482 | } |
| 483 | } |
| 484 | NO_INTR(sys_close(proc)); |
| 485 | sig_proc = proc = -1; |
| 486 | |
| 487 | /* If we failed to find any threads, try looking somewhere else in |
| 488 | * /proc. Maybe, threads are reported differently on this system. |
| 489 | */ |
| 490 | if (num_threads > 1 || !*++proc_path) { |
| 491 | NO_INTR(sys_close(marker)); |
| 492 | sig_marker = marker = -1; |
| 493 | |
| 494 | /* If we never found the parent process, something is very wrong. |
| 495 | * Most likely, we are running in debugger. Any attempt to operate |
| 496 | * on the threads would be very incomplete. Let's just report an |
| 497 | * error to the caller. |
| 498 | */ |
| 499 | if (!found_parent) { |
| 500 | TCMalloc_ResumeAllProcessThreads(num_threads, pids); |
| 501 | sys__exit(3); |
| 502 | } |
| 503 | |
| 504 | /* Now we are ready to call the callback, |
| 505 | * which takes care of resuming the threads for us. |
| 506 | */ |
| 507 | args->result = args->callback(args->parameter, num_threads, |
| 508 | pids, args->ap); |
| 509 | args->err = errno; |
| 510 | |
| 511 | /* Callback should have resumed threads, but better safe than sorry */ |
| 512 | if (TCMalloc_ResumeAllProcessThreads(num_threads, pids)) { |
| 513 | /* Callback forgot to resume at least one thread, report error */ |
| 514 | args->err = EINVAL; |
| 515 | args->result = -1; |
| 516 | } |
| 517 | |
| 518 | sys__exit(0); |
| 519 | } |
| 520 | detach_threads: |
| 521 | /* Resume all threads prior to retrying the operation */ |
| 522 | TCMalloc_ResumeAllProcessThreads(num_threads, pids); |
| 523 | sig_pids = NULL; |
| 524 | num_threads = 0; |
| 525 | sig_num_threads = num_threads; |
| 526 | max_threads += 100; |
| 527 | } |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | |
| 532 | /* This function gets the list of all linux threads of the current process |
| 533 | * passes them to the 'callback' along with the 'parameter' pointer; at the |
| 534 | * call back call time all the threads are paused via |
| 535 | * PTRACE_ATTACH. |
| 536 | * The callback is executed from a separate thread which shares only the |
| 537 | * address space, the filesystem, and the filehandles with the caller. Most |
| 538 | * notably, it does not share the same pid and ppid; and if it terminates, |
| 539 | * the rest of the application is still there. 'callback' is supposed to do |
| 540 | * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if |
| 541 | * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous |
| 542 | * signals are blocked. If the 'callback' decides to unblock them, it must |
| 543 | * ensure that they cannot terminate the application, or that |
| 544 | * TCMalloc_ResumeAllProcessThreads will get called. |
| 545 | * It is an error for the 'callback' to make any library calls that could |
| 546 | * acquire locks. Most notably, this means that most system calls have to |
| 547 | * avoid going through libc. Also, this means that it is not legal to call |
| 548 | * exit() or abort(). |
| 549 | * We return -1 on error and the return value of 'callback' on success. |
| 550 | */ |
| 551 | int TCMalloc_ListAllProcessThreads(void *parameter, |
| 552 | ListAllProcessThreadsCallBack callback, ...) { |
| 553 | char altstack_mem[ALT_STACKSIZE]; |
| 554 | struct ListerParams args; |
| 555 | pid_t clone_pid; |
| 556 | int dumpable = 1, sig; |
| 557 | struct kernel_sigset_t sig_blocked, sig_old; |
| 558 | sem_t lock; |
| 559 | |
| 560 | va_start(args.ap, callback); |
| 561 | |
| 562 | /* If we are short on virtual memory, initializing the alternate stack |
| 563 | * might trigger a SIGSEGV. Let's do this early, before it could get us |
| 564 | * into more trouble (i.e. before signal handlers try to use the alternate |
| 565 | * stack, and before we attach to other threads). |
| 566 | */ |
| 567 | memset(altstack_mem, 0, sizeof(altstack_mem)); |
| 568 | |
| 569 | /* Some of our cleanup functions could conceivable use more stack space. |
| 570 | * Try to touch the stack right now. This could be defeated by the compiler |
| 571 | * being too smart for it's own good, so try really hard. |
| 572 | */ |
| 573 | DirtyStack(32768); |
| 574 | |
| 575 | /* Make this process "dumpable". This is necessary in order to ptrace() |
| 576 | * after having called setuid(). |
| 577 | */ |
| 578 | dumpable = sys_prctl(PR_GET_DUMPABLE, 0); |
| 579 | if (!dumpable) |
| 580 | sys_prctl(PR_SET_DUMPABLE, 1); |
| 581 | |
| 582 | /* Fill in argument block for dumper thread */ |
| 583 | args.result = -1; |
| 584 | args.err = 0; |
| 585 | args.altstack_mem = altstack_mem; |
| 586 | args.parameter = parameter; |
| 587 | args.callback = callback; |
| 588 | args.lock = &lock; |
| 589 | |
| 590 | /* Before cloning the thread lister, block all asynchronous signals, as we */ |
| 591 | /* are not prepared to handle them. */ |
| 592 | sys_sigfillset(&sig_blocked); |
| 593 | for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { |
| 594 | sys_sigdelset(&sig_blocked, sync_signals[sig]); |
| 595 | } |
| 596 | if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) { |
| 597 | args.err = errno; |
| 598 | args.result = -1; |
| 599 | goto failed; |
| 600 | } |
| 601 | |
| 602 | /* scope */ { |
| 603 | /* After cloning, both the parent and the child share the same instance |
| 604 | * of errno. We must make sure that at least one of these processes |
| 605 | * (in our case, the parent) uses modified syscall macros that update |
| 606 | * a local copy of errno, instead. |
| 607 | */ |
| 608 | #ifdef __cplusplus |
| 609 | #define sys0_sigprocmask sys.sigprocmask |
| 610 | #define sys0_waitpid sys.waitpid |
| 611 | SysCalls sys; |
| 612 | #else |
| 613 | int my_errno; |
| 614 | #define SYS_ERRNO my_errno |
| 615 | #define SYS_INLINE inline |
| 616 | #define SYS_PREFIX 0 |
| 617 | #undef SYS_LINUX_SYSCALL_SUPPORT_H |
| 618 | #include "linux_syscall_support.h" |
| 619 | #endif |
| 620 | |
| 621 | /* Lock before clone so that parent can set |
| 622 | * ptrace permissions (if necessary) prior |
| 623 | * to ListerThread actually executing |
| 624 | */ |
| 625 | if (sem_init(&lock, 0, 0) == 0) { |
| 626 | |
| 627 | int clone_errno; |
| 628 | clone_pid = local_clone((int (*)(void *))ListerThread, &args); |
| 629 | clone_errno = errno; |
| 630 | |
| 631 | sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old); |
| 632 | |
| 633 | if (clone_pid >= 0) { |
| 634 | #ifdef PR_SET_PTRACER |
| 635 | /* In newer versions of glibc permission must explicitly |
| 636 | * be given to allow for ptrace. |
| 637 | */ |
| 638 | prctl(PR_SET_PTRACER, clone_pid, 0, 0, 0); |
| 639 | #endif |
| 640 | /* Releasing the lock here allows the |
| 641 | * ListerThread to execute and ptrace us. |
| 642 | */ |
| 643 | sem_post(&lock); |
| 644 | int status, rc; |
| 645 | while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 && |
| 646 | ERRNO == EINTR) { |
| 647 | /* Keep waiting */ |
| 648 | } |
| 649 | if (rc < 0) { |
| 650 | args.err = ERRNO; |
| 651 | args.result = -1; |
| 652 | } else if (WIFEXITED(status)) { |
| 653 | switch (WEXITSTATUS(status)) { |
| 654 | case 0: break; /* Normal process termination */ |
| 655 | case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */ |
| 656 | args.result = -1; |
| 657 | break; |
| 658 | case 3: args.err = EPERM; /* Process is already being traced */ |
| 659 | args.result = -1; |
| 660 | break; |
| 661 | default:args.err = ECHILD; /* Child died unexpectedly */ |
| 662 | args.result = -1; |
| 663 | break; |
| 664 | } |
| 665 | } else if (!WIFEXITED(status)) { |
| 666 | args.err = EFAULT; /* Terminated due to an unhandled signal*/ |
| 667 | args.result = -1; |
| 668 | } |
| 669 | sem_destroy(&lock); |
| 670 | } else { |
| 671 | args.result = -1; |
| 672 | args.err = clone_errno; |
| 673 | } |
| 674 | } else { |
| 675 | args.result = -1; |
| 676 | args.err = errno; |
| 677 | } |
| 678 | } |
| 679 | |
| 680 | /* Restore the "dumpable" state of the process */ |
| 681 | failed: |
| 682 | if (!dumpable) |
| 683 | sys_prctl(PR_SET_DUMPABLE, dumpable); |
| 684 | |
| 685 | va_end(args.ap); |
| 686 | |
| 687 | errno = args.err; |
| 688 | return args.result; |
| 689 | } |
| 690 | |
| 691 | /* This function resumes the list of all linux threads that |
| 692 | * TCMalloc_ListAllProcessThreads pauses before giving to its callback. |
| 693 | * The function returns non-zero if at least one thread was |
| 694 | * suspended and has now been resumed. |
| 695 | */ |
| 696 | int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { |
| 697 | int detached_at_least_one = 0; |
| 698 | while (num_threads-- > 0) { |
| 699 | detached_at_least_one |= sys_ptrace_detach(thread_pids[num_threads]) >= 0; |
| 700 | } |
| 701 | return detached_at_least_one; |
| 702 | } |
| 703 | |
| 704 | #ifdef __cplusplus |
| 705 | } |
| 706 | #endif |
| 707 | #endif |