Make lockless_queue_death_test work on aarch64
Implement it with ptrace so it's less platform-dependent. It still
doesn't work on armv7 though, because the kernel doesn't implement the
ptrace operation we need there.
Change-Id: I45c27408fcc1d4cff7f2d0743a762aa2f506024d
Signed-off-by: Brian Silverman <brian.silverman@bluerivertech.com>
diff --git a/aos/ipc_lib/lockless_queue_death_test.cc b/aos/ipc_lib/lockless_queue_death_test.cc
index 35ca5ca..2217811 100644
--- a/aos/ipc_lib/lockless_queue_death_test.cc
+++ b/aos/ipc_lib/lockless_queue_death_test.cc
@@ -1,7 +1,11 @@
#include <dlfcn.h>
+#include <elf.h>
#include <linux/futex.h>
#include <sys/mman.h>
+#include <sys/procfs.h>
+#include <sys/ptrace.h>
#include <sys/syscall.h>
+#include <sys/uio.h>
#include <unistd.h>
#include <wait.h>
@@ -125,14 +129,6 @@
};
::std::atomic<GlobalState *> global_state;
-#ifndef __ARM_EABI__
-#ifndef __x86_64__
-#error This code only works on amd64.
-#endif
-
-// The "trap bit" which enables single-stepping for x86.
-const greg_t kTrapFlag = 1 << 8;
-
// Returns true if the address is in the queue memory chunk.
bool IsInLocklessQueueMemory(void *address) {
GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
@@ -200,7 +196,6 @@
const int saved_errno = errno;
SIMPLE_ASSERT(signal == SIGSEGV, "wrong signal for SIGSEGV handler");
- ucontext_t *const context = static_cast<ucontext_t *>(context_void);
// Only process memory addresses in our shared memory block.
if (!IsInLocklessQueueMemory(siginfo->si_addr)) {
if (CallChainedAction(old_segv_handler, signal, siginfo, context_void)) {
@@ -216,9 +211,16 @@
HandleWrite(siginfo->si_addr);
ShmProtectOrDie(PROT_READ | PROT_WRITE);
- context->uc_mcontext.gregs[REG_EFL] |= kTrapFlag;
my_global_state->state = DieAtState::kWriting;
errno = saved_errno;
+
+#if defined(__x86_64__)
+ __asm__ __volatile__("int $3" ::: "memory", "cc");
+#elif defined(__aarch64__)
+ __asm__ __volatile__("brk #0" ::: "memory", "cc");
+#else
+#error Unhandled architecture
+#endif
}
// A mutex lock is about to happen. Mark the memory rw, and check to see if we
@@ -235,14 +237,12 @@
// The SEGV handler has set a breakpoint 1 instruction in the future. This
// clears it, marks memory readonly, and continues.
-void trap_handler(int signal, siginfo_t *, void *context_void) {
+void trap_handler(int signal, siginfo_t *, void * /*context*/) {
GlobalState *my_global_state = global_state.load(::std::memory_order_relaxed);
const int saved_errno = errno;
SIMPLE_ASSERT(signal == SIGTRAP, "wrong signal for SIGTRAP handler");
- ucontext_t *const context = static_cast<ucontext_t *>(context_void);
-
- context->uc_mcontext.gregs[REG_EFL] &= ~kTrapFlag;
+ my_global_state->state = DieAtState::kWriting;
SIMPLE_ASSERT(my_global_state->state == DieAtState::kWriting,
"bad state for SIGTRAP");
ShmProtectOrDie(PROT_READ);
@@ -268,7 +268,9 @@
struct sigaction action;
memset(&action, 0, sizeof(action));
action.sa_sigaction = handler;
- action.sa_flags = SA_RESTART | SA_SIGINFO;
+ // We don't do a full normal signal handler exit with ptrace, so SA_NODEFER is
+ // necessary to keep our signal handler active.
+ action.sa_flags = SA_RESTART | SA_SIGINFO | SA_NODEFER;
#ifdef AOS_SANITIZER_thread
// Tsan messes with signal handlers to check for race conditions, and it
// causes problems, so we have to work around it for SIGTRAP.
@@ -287,8 +289,6 @@
PCHECK(sigaction(signal, &action, old_action) == 0);
}
-#endif // ifndef __ARM_EABI__
-
// gtest only allows creating fatal failures in functions returning void...
// status is from wait(2).
void DetectFatalFailures(int status) {
@@ -299,7 +299,7 @@
FAIL() << " child exited because of signal "
<< aos_strsignal(WTERMSIG(status));
} else {
- FAIL() << "child exited with status " << ::std::hex << status;
+ FAIL() << " child exited with status " << ::std::hex << status;
}
}
@@ -335,6 +335,7 @@
CHECK_EQ(old_trap_handler.sa_handler, SIG_DFL);
linux_code::ipc_lib::SetShmAccessorObservers(futex_before, futex_after);
+ PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
ShmProtectOrDie(PROT_READ);
my_global_state->state = DieAtState::kRunning;
@@ -343,19 +344,84 @@
ShmProtectOrDie(PROT_READ | PROT_WRITE);
_exit(0);
} else {
+ // Annoying wrapper type because elf_gregset_t is an array, which C++
+ // handles poorly.
+ struct RestoreState {
+ RestoreState(elf_gregset_t regs_in) {
+ memcpy(regs, regs_in, sizeof(regs));
+ }
+ elf_gregset_t regs;
+ };
+ std::optional<RestoreState> restore_regs;
+ bool pass_trap = false;
// Wait until the child process dies.
while (true) {
int status;
pid_t waited_on = waitpid(pid, &status, 0);
if (waited_on == -1) {
if (errno == EINTR) continue;
- PCHECK(false) << ": waitpid(" << static_cast<intmax_t>(pid) << ", "
- << &status << ", 0) failed";
+ PCHECK(false) << ": waitpid(" << pid << ", " << &status
+ << ", 0) failed";
}
- if (waited_on != pid) {
- PCHECK(false) << ": waitpid got child "
- << static_cast<intmax_t>(waited_on) << " instead of "
- << static_cast<intmax_t>(pid);
+ CHECK_EQ(waited_on, pid)
+ << ": waitpid got child " << waited_on << " instead of " << pid;
+ if (WIFSTOPPED(status)) {
+ // The child was stopped via ptrace.
+ const int stop_signal = WSTOPSIG(status);
+ elf_gregset_t regs;
+ {
+ struct iovec iov;
+ iov.iov_base = ®s;
+ iov.iov_len = sizeof(regs);
+ PCHECK(ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov) == 0);
+ CHECK_EQ(iov.iov_len, sizeof(regs))
+ << ": ptrace regset is the wrong size";
+ }
+ if (stop_signal == SIGSEGV) {
+ // It's a SEGV, hopefully due to writing to the shared memory which is
+ // marked read-only. We record the instruction that faulted so we can
+ // look for it while single-stepping, then deliver the signal so the
+ // child can mark it read-write and then poke us to single-step that
+ // instruction.
+
+ CHECK(!restore_regs)
+ << ": Traced child got a SEGV while single-stepping";
+ // Save all the registers to resume execution at the current location
+ // in the child.
+ restore_regs = RestoreState(regs);
+ PCHECK(ptrace(PTRACE_CONT, pid, nullptr, SIGSEGV) == 0);
+ continue;
+ }
+ if (stop_signal == SIGTRAP) {
+ if (pass_trap) {
+ // This is the new SIGTRAP we generated, which we just want to pass
+ // through so the child's signal handler can restore the memory to
+ // read-only
+ PCHECK(ptrace(PTRACE_CONT, pid, nullptr, SIGTRAP) == 0);
+ pass_trap = false;
+ continue;
+ }
+ if (restore_regs) {
+ // Restore the state we saved before delivering the SEGV, and then
+ // single-step that one instruction.
+ struct iovec iov;
+ iov.iov_base = &restore_regs->regs;
+ iov.iov_len = sizeof(restore_regs->regs);
+ PCHECK(ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov) == 0);
+ restore_regs = std::nullopt;
+ PCHECK(ptrace(PTRACE_SINGLESTEP, pid, nullptr, nullptr) == 0);
+ continue;
+ }
+ // We executed the single instruction that originally faulted, so
+ // now deliver a SIGTRAP to the child so it can mark the memory
+ // read-only again.
+ pass_trap = true;
+ PCHECK(kill(pid, SIGTRAP) == 0);
+ PCHECK(ptrace(PTRACE_CONT, pid, nullptr, nullptr) == 0);
+ continue;
+ }
+ LOG(FATAL) << "Traced child was stopped with unexpected signal: "
+ << static_cast<int>(WSTOPSIG(status));
}
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == 0) return true;
@@ -466,6 +532,7 @@
if (RunFunctionDieAtAndCheck(config, prepare, function, check, &test_failed,
die_at, prepare_in_child, expected_writes,
nullptr)) {
+ LOG(INFO) << "Tested " << die_at << " death points";
return;
}
if (test_failed) {
@@ -567,7 +634,7 @@
}
if (print) {
- printf("Bad version:\n");
+ LOG(INFO) << "Bad version:";
PrintLocklessQueueMemory(memory);
}
@@ -575,7 +642,7 @@
LocklessQueueSender::Make(queue).value();
if (print) {
- printf("Cleaned up version:\n");
+ LOG(INFO) << "Cleaned up version:";
PrintLocklessQueueMemory(memory);
}