Validate that RobustOwnershipTracker catches all the deaths right
This adds tests for our new robust death tracker.
Change-Id: I66e1d03df13ad873bcaeb878afe118a78d95a3a1
Co-authored-by: Austin Schuh <austin.schuh@bluerivertech.com>
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/ipc_lib/BUILD b/aos/ipc_lib/BUILD
index d51ee25..75bbd4d 100644
--- a/aos/ipc_lib/BUILD
+++ b/aos/ipc_lib/BUILD
@@ -253,6 +253,16 @@
)
cc_test(
+ name = "robust_ownership_tracker_test",
+ srcs = ["robust_ownership_tracker_test.cc"],
+ target_compatible_with = ["@platforms//os:linux"],
+ deps = [
+ ":lockless_queue",
+ "//aos/testing:googletest",
+ ],
+)
+
+cc_test(
name = "lockless_queue_test",
timeout = "eternal",
srcs = ["lockless_queue_test.cc"],
diff --git a/aos/ipc_lib/lockless_queue_death_test.cc b/aos/ipc_lib/lockless_queue_death_test.cc
index 2f4e08a..92ad8a8 100644
--- a/aos/ipc_lib/lockless_queue_death_test.cc
+++ b/aos/ipc_lib/lockless_queue_death_test.cc
@@ -102,14 +102,13 @@
bool print = false;
// TestShmRobustness doesn't handle robust futexes. It is happy to just
- // not crash with them. We know what they are, and what the tid of the
- // holder is. So go pretend to be the kernel and fix it for it.
+ // not crash with them. We know what the futexes are, and what the tid
+ // of the corresponding holder is. So go pretend to be the kernel and
+ // fix the futex.
PretendThatOwnerIsDeadForTesting(&memory->queue_setup_lock, tid.Get());
for (size_t i = 0; i < config.num_senders; ++i) {
- if (memory->GetSender(i)
- ->ownership_tracker.PretendThatOwnerIsDeadForTesting(
- tid.Get())) {
+ if (memory->GetSender(i)->ownership_tracker.IsHeldBy(tid.Get())) {
// Print out before and after results if a sender died. That is the
// more fun case.
print = true;
diff --git a/aos/ipc_lib/robust_ownership_tracker.cc b/aos/ipc_lib/robust_ownership_tracker.cc
index 3b6d32a..f106113 100644
--- a/aos/ipc_lib/robust_ownership_tracker.cc
+++ b/aos/ipc_lib/robust_ownership_tracker.cc
@@ -5,10 +5,6 @@
namespace aos {
namespace ipc_lib {
-bool RobustOwnershipTracker::PretendThatOwnerIsDeadForTesting(pid_t died_tid) {
- return ipc_lib::PretendThatOwnerIsDeadForTesting(&mutex_, died_tid);
-}
-
::std::string RobustOwnershipTracker::DebugString() const {
::std::stringstream s;
s << "{.tid=aos_mutex(" << ::std::hex << mutex_.futex;
diff --git a/aos/ipc_lib/robust_ownership_tracker.h b/aos/ipc_lib/robust_ownership_tracker.h
index 6f0d222..a8bbca3 100644
--- a/aos/ipc_lib/robust_ownership_tracker.h
+++ b/aos/ipc_lib/robust_ownership_tracker.h
@@ -10,6 +10,9 @@
#include "aos/util/top.h"
namespace aos::ipc_lib {
+namespace testing {
+class RobustOwnershipTrackerTest;
+} // namespace testing
// Results of atomically loading the ownership state via RobustOwnershipTracker
// below. This allows the state to be compared and queried later.
@@ -122,6 +125,11 @@
// Returns true if this thread holds ownership.
bool IsHeldBySelf() { return death_notification_is_held(&mutex_); }
+ // Returns true if the mutex is held by the provided tid. This is primarily
+ // intended for testing. There should be no need to call this in production
+ // code.
+ bool IsHeldBy(pid_t tid) { return LoadRelaxed().tid() == tid; }
+
// Acquires ownership. Other threads will know that this thread holds the
// ownership or be notified if this thread dies.
void Acquire() {
@@ -143,18 +151,12 @@
start_time_ticks_ = kNoStartTimeTicks;
}
- // Marks the owner as dead if the specified tid is the current owner. In other
- // words, after this call, a call to `LoadAcquire().OwnerIsDead()` may start
- // returning true.
- //
- // The motivation here is for use in testing. DO NOT USE in production code.
- // The logic here is only good enough for testing.
- bool PretendThatOwnerIsDeadForTesting(pid_t tid);
-
// Returns a string representing this object.
std::string DebugString() const;
private:
+ friend class testing::RobustOwnershipTrackerTest;
+
// Robust futex to track ownership the normal way. The futex is inside the
// mutex here. We use the wrapper mutex because the death_notification_*
// functions operate on that instead of the futex directly.
diff --git a/aos/ipc_lib/robust_ownership_tracker_test.cc b/aos/ipc_lib/robust_ownership_tracker_test.cc
new file mode 100644
index 0000000..b204886
--- /dev/null
+++ b/aos/ipc_lib/robust_ownership_tracker_test.cc
@@ -0,0 +1,155 @@
+#include "aos/ipc_lib/robust_ownership_tracker.h"
+
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "gtest/gtest.h"
+
+namespace aos::ipc_lib::testing {
+
+// Capture RobustOwnershipTracker in shared memory so it is shared across a
+// fork.
+class SharedRobustOwnershipTracker {
+ public:
+ SharedRobustOwnershipTracker() {
+ tracker_ = static_cast<RobustOwnershipTracker *>(
+ mmap(nullptr, sizeof(RobustOwnershipTracker), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+ PCHECK(MAP_FAILED != tracker_);
+ };
+ ~SharedRobustOwnershipTracker() {
+ PCHECK(munmap(tracker_, sizeof(RobustOwnershipTracker)) != -1);
+ }
+
+ // Captures the tid.
+ RobustOwnershipTracker &tracker() const { return *tracker_; }
+
+ private:
+ RobustOwnershipTracker *tracker_;
+};
+
+class RobustOwnershipTrackerTest : public ::testing::Test {
+ public:
+ // Runs a function in a child process, and then exits afterwards. Waits for
+ // the child to finish before resuming.
+ template <typename T>
+ void RunInChildAndBlockUntilComplete(T fn) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ fn();
+ LOG(INFO) << "Child exiting normally.";
+ exit(0);
+ return;
+ }
+
+ LOG(INFO) << "Child has pid " << pid;
+
+ while (true) {
+ LOG(INFO) << "Waiting for child.";
+ int status;
+ const pid_t waited_on = waitpid(pid, &status, 0);
+ // Check for failure.
+ if (waited_on == -1) {
+ if (errno == EINTR) continue;
+ PLOG(FATAL) << ": waitpid(" << pid << ", " << &status << ", 0) failed";
+ }
+ CHECK_EQ(waited_on, pid)
+ << ": waitpid() got child " << waited_on << " instead of " << pid;
+ CHECK(WIFEXITED(status));
+ LOG(INFO) << "Status " << WEXITSTATUS(status);
+ CHECK(WEXITSTATUS(status) == 0);
+ return;
+ }
+ }
+
+ // Returns the robust mutex.
+ aos_mutex &GetMutex(RobustOwnershipTracker &tracker) {
+ return tracker.mutex_;
+ }
+
+ // Returns the current start time in ticks.
+ uint64_t GetStartTimeTicks(RobustOwnershipTracker &tracker) {
+ return tracker.start_time_ticks_.load();
+ }
+
+ // Sets the current start time in ticks.
+ void SetStartTimeTicks(RobustOwnershipTracker &tracker, uint64_t start_time) {
+ tracker.start_time_ticks_ = start_time;
+ }
+};
+
+// Tests that acquiring the futex doesn't erroneously report the owner (i.e.
+// "us") as dead.
+TEST_F(RobustOwnershipTrackerTest, AcquireWorks) {
+ SharedRobustOwnershipTracker shared_tracker;
+
+ EXPECT_FALSE(shared_tracker.tracker().OwnerIsDefinitelyAbsolutelyDead());
+
+ // Run acquire in the this process, and expect it should not be dead until
+ // after the test finishes.
+ shared_tracker.tracker().Acquire();
+
+ // We have ownership. Since we are alive, the owner should not be marked as
+ // dead. We can use relaxed ordering since we are the only ones touching the
+ // data here.
+ EXPECT_FALSE(shared_tracker.tracker().LoadRelaxed().OwnerIsDead());
+ EXPECT_FALSE(shared_tracker.tracker().OwnerIsDefinitelyAbsolutelyDead());
+}
+
+// Tests that child death without unlocking results in the futex being marked as
+// dead, and the owner being very dead.
+TEST_F(RobustOwnershipTrackerTest, FutexRecovers) {
+ SharedRobustOwnershipTracker shared_tracker;
+
+ RunInChildAndBlockUntilComplete(
+ [&]() { shared_tracker.tracker().Acquire(); });
+
+ // Since the child that took ownership died, we expect that death to be
+ // reported.
+ EXPECT_TRUE(shared_tracker.tracker().LoadRelaxed().OwnerIsDead());
+ EXPECT_TRUE(shared_tracker.tracker().OwnerIsDefinitelyAbsolutelyDead());
+}
+
+// Tests that a PID which doesn't exist results in the process being noticed as
+// dead when we inspect /proc.
+TEST_F(RobustOwnershipTrackerTest, NoMatchingPID) {
+ SharedRobustOwnershipTracker shared_tracker;
+
+ shared_tracker.tracker().Acquire();
+ EXPECT_FALSE(shared_tracker.tracker().LoadRelaxed().OwnerIsDead());
+ EXPECT_FALSE(shared_tracker.tracker().OwnerIsDefinitelyAbsolutelyDead());
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wignored-attributes"
+ GetMutex(shared_tracker.tracker()).futex =
+ std::numeric_limits<aos_futex>::max() & FUTEX_TID_MASK;
+#pragma GCC diagnostic pop
+
+ // Since we're only pretending that the owner died (by changing the TID in the
+ // futex), we only notice that the owner is dead when spending the time
+ // walking through /proc.
+ EXPECT_FALSE(shared_tracker.tracker().LoadRelaxed().OwnerIsDead());
+ EXPECT_TRUE(shared_tracker.tracker().OwnerIsDefinitelyAbsolutelyDead());
+}
+
+// Tests that a mismatched start time results in the process being marked as
+// dead.
+TEST_F(RobustOwnershipTrackerTest, NoMatchingStartTime) {
+ SharedRobustOwnershipTracker shared_tracker;
+
+ shared_tracker.tracker().Acquire();
+ EXPECT_FALSE(shared_tracker.tracker().LoadRelaxed().OwnerIsDead());
+ EXPECT_FALSE(shared_tracker.tracker().OwnerIsDefinitelyAbsolutelyDead());
+
+ EXPECT_NE(GetStartTimeTicks(shared_tracker.tracker()), 0);
+ EXPECT_NE(GetStartTimeTicks(shared_tracker.tracker()),
+ RobustOwnershipTracker::kNoStartTimeTicks);
+ SetStartTimeTicks(shared_tracker.tracker(), 1);
+
+ // Since we're only pretending that the owner died (by changing the tracked
+ // start time ticks in the tracker), we only notice that the owner is dead
+ // when spending the time walking through /proc.
+ EXPECT_FALSE(shared_tracker.tracker().LoadRelaxed().OwnerIsDead());
+ EXPECT_TRUE(shared_tracker.tracker().OwnerIsDefinitelyAbsolutelyDead());
+}
+
+} // namespace aos::ipc_lib::testing