blob: 768d72f3318d27797a1e41b653aa6b09d4cc1e5c [file] [log] [blame]
Austin Schuh906616c2019-01-21 20:25:11 -08001// Copyright (c) 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// Author: Satoru Takabayashi
31//
32// Implementation of InstallFailureSignalHandler().
33
34#include "utilities.h"
35#include "stacktrace.h"
36#include "symbolize.h"
37#include "glog/logging.h"
38
39#include <signal.h>
40#include <time.h>
41#ifdef HAVE_UCONTEXT_H
42# include <ucontext.h>
43#endif
44#ifdef HAVE_SYS_UCONTEXT_H
45# include <sys/ucontext.h>
46#endif
47#include <algorithm>
48
Austin Schuh77f3f222022-06-10 16:49:21 -070049namespace aos {
50void FatalUnsetRealtimePriority() __attribute__((weak));
51}
52
Austin Schuh906616c2019-01-21 20:25:11 -080053_START_GOOGLE_NAMESPACE_
54
55namespace {
56
Austin Schuh77f3f222022-06-10 16:49:21 -070057void MaybeUnsetRealtime() {
58 if (&aos::FatalUnsetRealtimePriority != nullptr) {
59 aos::FatalUnsetRealtimePriority();
60 }
61}
62
Austin Schuh906616c2019-01-21 20:25:11 -080063// We'll install the failure signal handler for these signals. We could
64// use strsignal() to get signal names, but we don't use it to avoid
65// introducing yet another #ifdef complication.
66//
67// The list should be synced with the comment in signalhandler.h.
68const struct {
69 int number;
70 const char *name;
71} kFailureSignals[] = {
72 { SIGSEGV, "SIGSEGV" },
73 { SIGILL, "SIGILL" },
74 { SIGFPE, "SIGFPE" },
75 { SIGABRT, "SIGABRT" },
76#if !defined(OS_WINDOWS)
77 { SIGBUS, "SIGBUS" },
78#endif
79 { SIGTERM, "SIGTERM" },
80};
81
82static bool kFailureSignalHandlerInstalled = false;
83
84// Returns the program counter from signal context, NULL if unknown.
85void* GetPC(void* ucontext_in_void) {
86#if (defined(HAVE_UCONTEXT_H) || defined(HAVE_SYS_UCONTEXT_H)) && defined(PC_FROM_UCONTEXT)
87 if (ucontext_in_void != NULL) {
88 ucontext_t *context = reinterpret_cast<ucontext_t *>(ucontext_in_void);
89 return (void*)context->PC_FROM_UCONTEXT;
90 }
Austin Schuh10358f22019-01-21 20:25:11 -080091#else
92 (void)ucontext_in_void;
Austin Schuh906616c2019-01-21 20:25:11 -080093#endif
94 return NULL;
95}
96
97// The class is used for formatting error messages. We don't use printf()
98// as it's not async signal safe.
99class MinimalFormatter {
100 public:
101 MinimalFormatter(char *buffer, int size)
102 : buffer_(buffer),
103 cursor_(buffer),
104 end_(buffer + size) {
105 }
106
107 // Returns the number of bytes written in the buffer.
108 int num_bytes_written() const { return (int) (cursor_ - buffer_); }
109
110 // Appends string from "str" and updates the internal cursor.
111 void AppendString(const char* str) {
112 int i = 0;
113 while (str[i] != '\0' && cursor_ + i < end_) {
114 cursor_[i] = str[i];
115 ++i;
116 }
117 cursor_ += i;
118 }
119
120 // Formats "number" in "radix" and updates the internal cursor.
121 // Lowercase letters are used for 'a' - 'z'.
122 void AppendUint64(uint64 number, int radix) {
123 int i = 0;
124 while (cursor_ + i < end_) {
125 const int tmp = number % radix;
126 number /= radix;
127 cursor_[i] = (tmp < 10 ? '0' + tmp : 'a' + tmp - 10);
128 ++i;
129 if (number == 0) {
130 break;
131 }
132 }
133 // Reverse the bytes written.
134 std::reverse(cursor_, cursor_ + i);
135 cursor_ += i;
136 }
137
138 // Formats "number" as hexadecimal number, and updates the internal
139 // cursor. Padding will be added in front if needed.
140 void AppendHexWithPadding(uint64 number, int width) {
141 char* start = cursor_;
142 AppendString("0x");
143 AppendUint64(number, 16);
144 // Move to right and add padding in front if needed.
145 if (cursor_ < start + width) {
146 const int64 delta = start + width - cursor_;
147 std::copy(start, cursor_, start + delta);
148 std::fill(start, start + delta, ' ');
149 cursor_ = start + width;
150 }
151 }
152
153 private:
154 char *buffer_;
155 char *cursor_;
156 const char * const end_;
157};
158
159// Writes the given data with the size to the standard error.
160void WriteToStderr(const char* data, int size) {
161 if (write(STDERR_FILENO, data, size) < 0) {
162 // Ignore errors.
163 }
164}
165
166// The writer function can be changed by InstallFailureWriter().
167void (*g_failure_writer)(const char* data, int size) = WriteToStderr;
168
169// Dumps time information. We don't dump human-readable time information
170// as localtime() is not guaranteed to be async signal safe.
171void DumpTimeInfo() {
172 time_t time_in_sec = time(NULL);
173 char buf[256]; // Big enough for time info.
174 MinimalFormatter formatter(buf, sizeof(buf));
175 formatter.AppendString("*** Aborted at ");
176 formatter.AppendUint64(time_in_sec, 10);
177 formatter.AppendString(" (unix time)");
178 formatter.AppendString(" try \"date -d @");
179 formatter.AppendUint64(time_in_sec, 10);
180 formatter.AppendString("\" if you are using GNU date ***\n");
181 g_failure_writer(buf, formatter.num_bytes_written());
182}
183
184// TOOD(hamaji): Use signal instead of sigaction?
185#ifdef HAVE_SIGACTION
186
187// Dumps information about the signal to STDERR.
188void DumpSignalInfo(int signal_number, siginfo_t *siginfo) {
189 // Get the signal name.
190 const char* signal_name = NULL;
191 for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) {
192 if (signal_number == kFailureSignals[i].number) {
193 signal_name = kFailureSignals[i].name;
194 }
195 }
196
197 char buf[256]; // Big enough for signal info.
198 MinimalFormatter formatter(buf, sizeof(buf));
199
200 formatter.AppendString("*** ");
201 if (signal_name) {
202 formatter.AppendString(signal_name);
203 } else {
204 // Use the signal number if the name is unknown. The signal name
205 // should be known, but just in case.
206 formatter.AppendString("Signal ");
207 formatter.AppendUint64(signal_number, 10);
208 }
209 formatter.AppendString(" (@0x");
210 formatter.AppendUint64(reinterpret_cast<uintptr_t>(siginfo->si_addr), 16);
211 formatter.AppendString(")");
212 formatter.AppendString(" received by PID ");
213 formatter.AppendUint64(getpid(), 10);
214 formatter.AppendString(" (TID 0x");
215 // We assume pthread_t is an integral number or a pointer, rather
216 // than a complex struct. In some environments, pthread_self()
217 // returns an uint64 but in some other environments pthread_self()
218 // returns a pointer. Hence we use C-style cast here, rather than
219 // reinterpret/static_cast, to support both types of environments.
220 formatter.AppendUint64((uintptr_t)pthread_self(), 16);
221 formatter.AppendString(") ");
222 // Only linux has the PID of the signal sender in si_pid.
223#ifdef OS_LINUX
224 formatter.AppendString("from PID ");
225 formatter.AppendUint64(siginfo->si_pid, 10);
226 formatter.AppendString("; ");
227#endif
228 formatter.AppendString("stack trace: ***\n");
229 g_failure_writer(buf, formatter.num_bytes_written());
230}
231
232#endif // HAVE_SIGACTION
233
234// Dumps information about the stack frame to STDERR.
235void DumpStackFrameInfo(const char* prefix, void* pc) {
236 // Get the symbol name.
237 const char *symbol = "(unknown)";
238 char symbolized[1024]; // Big enough for a sane symbol.
239 // Symbolizes the previous address of pc because pc may be in the
240 // next function.
241 if (Symbolize(reinterpret_cast<char *>(pc) - 1,
242 symbolized, sizeof(symbolized))) {
243 symbol = symbolized;
244 }
245
246 char buf[1024]; // Big enough for stack frame info.
247 MinimalFormatter formatter(buf, sizeof(buf));
248
249 formatter.AppendString(prefix);
250 formatter.AppendString("@ ");
251 const int width = 2 * sizeof(void*) + 2; // + 2 for "0x".
252 formatter.AppendHexWithPadding(reinterpret_cast<uintptr_t>(pc), width);
253 formatter.AppendString(" ");
254 formatter.AppendString(symbol);
255 formatter.AppendString("\n");
256 g_failure_writer(buf, formatter.num_bytes_written());
257}
258
259// Invoke the default signal handler.
260void InvokeDefaultSignalHandler(int signal_number) {
261#ifdef HAVE_SIGACTION
262 struct sigaction sig_action;
263 memset(&sig_action, 0, sizeof(sig_action));
264 sigemptyset(&sig_action.sa_mask);
265 sig_action.sa_handler = SIG_DFL;
266 sigaction(signal_number, &sig_action, NULL);
267 kill(getpid(), signal_number);
268#elif defined(OS_WINDOWS)
269 signal(signal_number, SIG_DFL);
270 raise(signal_number);
271#endif
272}
273
274// This variable is used for protecting FailureSignalHandler() from
275// dumping stuff while another thread is doing it. Our policy is to let
276// the first thread dump stuff and let other threads wait.
277// See also comments in FailureSignalHandler().
278static pthread_t* g_entered_thread_id_pointer = NULL;
279
280// Dumps signal and stack frame information, and invokes the default
281// signal handler once our job is done.
282#if defined(OS_WINDOWS)
283void FailureSignalHandler(int signal_number)
284#else
285void FailureSignalHandler(int signal_number,
286 siginfo_t *signal_info,
287 void *ucontext)
288#endif
289{
290 // First check if we've already entered the function. We use an atomic
291 // compare and swap operation for platforms that support it. For other
292 // platforms, we use a naive method that could lead to a subtle race.
293
294 // We assume pthread_self() is async signal safe, though it's not
295 // officially guaranteed.
296 pthread_t my_thread_id = pthread_self();
297 // NOTE: We could simply use pthread_t rather than pthread_t* for this,
298 // if pthread_self() is guaranteed to return non-zero value for thread
299 // ids, but there is no such guarantee. We need to distinguish if the
300 // old value (value returned from __sync_val_compare_and_swap) is
301 // different from the original value (in this case NULL).
302 pthread_t* old_thread_id_pointer =
303 glog_internal_namespace_::sync_val_compare_and_swap(
304 &g_entered_thread_id_pointer,
305 static_cast<pthread_t*>(NULL),
306 &my_thread_id);
307 if (old_thread_id_pointer != NULL) {
308 // We've already entered the signal handler. What should we do?
309 if (pthread_equal(my_thread_id, *g_entered_thread_id_pointer)) {
310 // It looks the current thread is reentering the signal handler.
311 // Something must be going wrong (maybe we are reentering by another
312 // type of signal?). Kill ourself by the default signal handler.
313 InvokeDefaultSignalHandler(signal_number);
314 }
315 // Another thread is dumping stuff. Let's wait until that thread
316 // finishes the job and kills the process.
317 while (true) {
318 sleep(1);
319 }
320 }
Austin Schuh77f3f222022-06-10 16:49:21 -0700321 MaybeUnsetRealtime();
Austin Schuha8faf282020-03-08 14:49:53 -0700322
Austin Schuh906616c2019-01-21 20:25:11 -0800323 // This is the first time we enter the signal handler. We are going to
324 // do some interesting stuff from here.
325 // TODO(satorux): We might want to set timeout here using alarm(), but
326 // mixing alarm() and sleep() can be a bad idea.
327
328 // First dump time info.
329 DumpTimeInfo();
330
331#if !defined(OS_WINDOWS)
332 // Get the program counter from ucontext.
333 void *pc = GetPC(ucontext);
334 DumpStackFrameInfo("PC: ", pc);
Austin Schuh10358f22019-01-21 20:25:11 -0800335#else
336 (void)ucontext;
Austin Schuh906616c2019-01-21 20:25:11 -0800337#endif
338
339#ifdef HAVE_STACKTRACE
340 // Get the stack traces.
341 void *stack[32];
342 // +1 to exclude this function.
343 const int depth = GetStackTrace(stack, ARRAYSIZE(stack), 1);
344# ifdef HAVE_SIGACTION
345 DumpSignalInfo(signal_number, signal_info);
Austin Schuh10358f22019-01-21 20:25:11 -0800346# else
347 (void)signal_info;
Austin Schuh906616c2019-01-21 20:25:11 -0800348# endif
349 // Dump the stack traces.
350 for (int i = 0; i < depth; ++i) {
351 DumpStackFrameInfo(" ", stack[i]);
352 }
Austin Schuh10358f22019-01-21 20:25:11 -0800353#else
354 (void)signal_info;
Austin Schuh906616c2019-01-21 20:25:11 -0800355#endif
356
357 // *** TRANSITION ***
358 //
359 // BEFORE this point, all code must be async-termination-safe!
360 // (See WARNING above.)
361 //
362 // AFTER this point, we do unsafe things, like using LOG()!
363 // The process could be terminated or hung at any time. We try to
364 // do more useful things first and riskier things later.
365
366 // Flush the logs before we do anything in case 'anything'
367 // causes problems.
368 FlushLogFilesUnsafe(0);
369
370 // Kill ourself by the default signal handler.
371 InvokeDefaultSignalHandler(signal_number);
372}
373
374} // namespace
375
376namespace glog_internal_namespace_ {
377
378bool IsFailureSignalHandlerInstalled() {
379#ifdef HAVE_SIGACTION
380 // TODO(andschwa): Return kFailureSignalHandlerInstalled?
381 struct sigaction sig_action;
382 memset(&sig_action, 0, sizeof(sig_action));
383 sigemptyset(&sig_action.sa_mask);
384 sigaction(SIGABRT, NULL, &sig_action);
385 if (sig_action.sa_sigaction == &FailureSignalHandler)
386 return true;
387#elif defined(OS_WINDOWS)
388 return kFailureSignalHandlerInstalled;
389#endif // HAVE_SIGACTION
390 return false;
391}
392
393} // namespace glog_internal_namespace_
394
395void InstallFailureSignalHandler() {
396#ifdef HAVE_SIGACTION
397 // Build the sigaction struct.
398 struct sigaction sig_action;
399 memset(&sig_action, 0, sizeof(sig_action));
400 sigemptyset(&sig_action.sa_mask);
401 sig_action.sa_flags |= SA_SIGINFO;
402 sig_action.sa_sigaction = &FailureSignalHandler;
403
404 for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) {
405 CHECK_ERR(sigaction(kFailureSignals[i].number, &sig_action, NULL));
406 }
407 kFailureSignalHandlerInstalled = true;
408#elif defined(OS_WINDOWS)
409 for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) {
410 CHECK_NE(signal(kFailureSignals[i].number, &FailureSignalHandler),
411 SIG_ERR);
412 }
413 kFailureSignalHandlerInstalled = true;
414#endif // HAVE_SIGACTION
415}
416
417void InstallFailureWriter(void (*writer)(const char* data, int size)) {
418#if defined(HAVE_SIGACTION) || defined(OS_WINDOWS)
419 g_failure_writer = writer;
420#endif // HAVE_SIGACTION
421}
422
423_END_GOOGLE_NAMESPACE_