blob: bc2db172dfed5c2be007f378248bd22d3fd1b8a1 [file] [log] [blame]
Philipp Schraderab2f8432023-09-17 18:58:06 -07001#ifndef AOS_IPC_LIB_ROBUST_OWNERSHIP_TRACKER_H_
2#define AOS_IPC_LIB_ROBUST_OWNERSHIP_TRACKER_H_
3
Stephan Pleines682928d2024-05-31 20:43:48 -07004#include <assert.h>
Philipp Schraderab2f8432023-09-17 18:58:06 -07005#include <linux/futex.h>
Stephan Pleines682928d2024-05-31 20:43:48 -07006#include <stdint.h>
Philipp Schrader81fa3fb2023-09-17 18:58:35 -07007#include <sys/syscall.h>
Stephan Pleines682928d2024-05-31 20:43:48 -07008#include <unistd.h>
Philipp Schraderab2f8432023-09-17 18:58:06 -07009
Stephan Pleines682928d2024-05-31 20:43:48 -070010#include <atomic>
11#include <limits>
12#include <optional>
13#include <ostream>
Philipp Schraderab2f8432023-09-17 18:58:06 -070014#include <string>
15
Austin Schuh99f7c6a2024-06-25 22:07:44 -070016#include "absl/log/check.h"
17#include "absl/log/log.h"
Stephan Pleines682928d2024-05-31 20:43:48 -070018
Philipp Schraderab2f8432023-09-17 18:58:06 -070019#include "aos/ipc_lib/aos_sync.h"
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070020#include "aos/util/top.h"
Philipp Schraderab2f8432023-09-17 18:58:06 -070021
22namespace aos::ipc_lib {
Philipp Schrader23eabd62023-09-19 14:59:49 -070023namespace testing {
24class RobustOwnershipTrackerTest;
25} // namespace testing
Philipp Schraderab2f8432023-09-17 18:58:06 -070026
27// Results of atomically loading the ownership state via RobustOwnershipTracker
28// below. This allows the state to be compared and queried later.
29class ThreadOwnerStatusSnapshot {
30 public:
31 ThreadOwnerStatusSnapshot() : futex_(0) {}
32 ThreadOwnerStatusSnapshot(aos_futex futex) : futex_(futex) {}
33 ThreadOwnerStatusSnapshot(const ThreadOwnerStatusSnapshot &) = default;
34 ThreadOwnerStatusSnapshot &operator=(const ThreadOwnerStatusSnapshot &) =
35 default;
36 ThreadOwnerStatusSnapshot(ThreadOwnerStatusSnapshot &&) = default;
37 ThreadOwnerStatusSnapshot &operator=(ThreadOwnerStatusSnapshot &&) = default;
38
39 // Returns if the owner died as noticed by the robust futex using Acquire
40 // memory ordering.
41 bool OwnerIsDead() const { return (futex_ & FUTEX_OWNER_DIED) != 0; }
42
43 // Returns true if no one has claimed ownership.
44 bool IsUnclaimed() const { return futex_ == 0; }
45
Philipp Schraderab2f8432023-09-17 18:58:06 -070046 // Returns the thread ID (a.k.a. "tid") of the owning thread. Use this when
47 // trying to access the /proc entry that corresponds to the owning thread for
48 // example. Do not use the futex value directly.
49 pid_t tid() const { return futex_ & FUTEX_TID_MASK; }
50
51 bool operator==(const ThreadOwnerStatusSnapshot &other) const {
52 return other.futex_ == futex_;
53 }
54
55 private:
56 aos_futex futex_;
57};
58
59// This object reliably tracks a thread owning a resource. A single thread may
60// possess multiple resources like senders and receivers. Each resource can have
61// its own instance of this class. These instances are responsible for
62// monitoring the thread that owns them. Each resource can use its instance of
63// this class to reliably check whether the owning thread is no longer alive.
64//
65// All methods other than Load* must be accessed under a mutex.
66class RobustOwnershipTracker {
67 public:
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070068 static constexpr uint64_t kNoStartTimeTicks =
69 std::numeric_limits<uint64_t>::max();
70
71 static uint64_t ReadStartTimeTicks(pid_t tid) {
72 if (tid == 0) {
73 return kNoStartTimeTicks;
74 }
75 std::optional<aos::util::ProcStat> proc_stat = util::ReadProcStat(tid);
76 if (!proc_stat.has_value()) {
77 return kNoStartTimeTicks;
78 }
79 return proc_stat->start_time_ticks;
80 }
81
82 // Loads the realtime-compatible contents of the ownership tracker with
83 // Acquire memory ordering.
Philipp Schraderab2f8432023-09-17 18:58:06 -070084 ThreadOwnerStatusSnapshot LoadAcquire() const {
85 return ThreadOwnerStatusSnapshot(
86 __atomic_load_n(&(mutex_.futex), __ATOMIC_ACQUIRE));
87 }
88
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070089 // Loads all the realtime-compatible contents of the ownership tracker with
90 // Relaxed memory order.
Philipp Schraderab2f8432023-09-17 18:58:06 -070091 ThreadOwnerStatusSnapshot LoadRelaxed() const {
92 return ThreadOwnerStatusSnapshot(
93 __atomic_load_n(&(mutex_.futex), __ATOMIC_RELAXED));
94 }
95
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070096 // Checks both the robust futex and dredges through /proc to see if the thread
97 // is alive. As per the class description, this must only be called under a
98 // mutex. This must not be called in a realtime context and it is slow.
99 bool OwnerIsDefinitelyAbsolutelyDead() const {
100 auto loaded = LoadAcquire();
101 if (loaded.OwnerIsDead()) {
102 return true;
103 }
104 if (loaded.IsUnclaimed()) {
105 return false;
106 }
107 const uint64_t proc_start_time_ticks = ReadStartTimeTicks(loaded.tid());
108 if (proc_start_time_ticks == kNoStartTimeTicks) {
109 LOG(ERROR) << "Detected that PID " << loaded.tid() << " died.";
110 return true;
111 }
112
113 if (proc_start_time_ticks != start_time_ticks_) {
114 LOG(ERROR) << "Detected that PID " << loaded.tid()
115 << " died from a starttime missmatch.";
116 return true;
117 }
118 return false;
119 }
120
Philipp Schraderab2f8432023-09-17 18:58:06 -0700121 // Clears all ownership state.
122 //
123 // This should only really be called if you are 100% certain that the owner is
124 // dead. Use `LoadAquire().OwnerIsDead()` to determine this.
125 void ForceClear() {
126 // Must be opposite order of Acquire.
127 // We only deal with the futex here because we don't want to change anything
128 // about the linked list. We just want to release ownership here. We still
129 // want the kernel to know about this element via the linked list the next
130 // time someone takes ownership.
131 __atomic_store_n(&(mutex_.futex), 0, __ATOMIC_RELEASE);
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700132 start_time_ticks_ = kNoStartTimeTicks;
Philipp Schraderab2f8432023-09-17 18:58:06 -0700133 }
134
135 // Returns true if this thread holds ownership.
136 bool IsHeldBySelf() { return death_notification_is_held(&mutex_); }
137
Philipp Schrader23eabd62023-09-19 14:59:49 -0700138 // Returns true if the mutex is held by the provided tid. This is primarily
139 // intended for testing. There should be no need to call this in production
140 // code.
141 bool IsHeldBy(pid_t tid) { return LoadRelaxed().tid() == tid; }
142
Philipp Schraderab2f8432023-09-17 18:58:06 -0700143 // Acquires ownership. Other threads will know that this thread holds the
144 // ownership or be notified if this thread dies.
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700145 void Acquire() {
146 pid_t tid = syscall(SYS_gettid);
147 assert(tid > 0);
148 const uint64_t proc_start_time_ticks = ReadStartTimeTicks(tid);
149 CHECK_NE(proc_start_time_ticks, kNoStartTimeTicks);
150
151 start_time_ticks_ = proc_start_time_ticks;
152 death_notification_init(&mutex_);
153 }
Philipp Schraderab2f8432023-09-17 18:58:06 -0700154
155 // Releases ownership.
156 //
157 // This should only be called from the owning thread.
158 void Release() {
159 // Must be opposite order of Acquire.
160 death_notification_release(&mutex_);
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700161 start_time_ticks_ = kNoStartTimeTicks;
Philipp Schraderab2f8432023-09-17 18:58:06 -0700162 }
163
Philipp Schraderab2f8432023-09-17 18:58:06 -0700164 // Returns a string representing this object.
165 std::string DebugString() const;
166
167 private:
Philipp Schrader23eabd62023-09-19 14:59:49 -0700168 friend class testing::RobustOwnershipTrackerTest;
169
Philipp Schraderab2f8432023-09-17 18:58:06 -0700170 // Robust futex to track ownership the normal way. The futex is inside the
171 // mutex here. We use the wrapper mutex because the death_notification_*
172 // functions operate on that instead of the futex directly.
173 //
174 // We use a futex here because:
175 // - futexes are fast.
176 // - The kernel can atomically clean up a dead thread and mark the futex
177 // appropriately.
178 // - Owners can clean up after dead threads.
179 aos_mutex mutex_;
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700180
181 // Thread's start time ticks.
182 std::atomic<uint64_t> start_time_ticks_;
Philipp Schraderab2f8432023-09-17 18:58:06 -0700183};
184
185} // namespace aos::ipc_lib
186
187#endif // AOS_IPC_LIB_ROBUST_OWNERSHIP_TRACKER_H_