blob: 6f0d2221fcc231fc1ccb0211f87a3d315853e75f [file] [log] [blame]
Philipp Schraderab2f8432023-09-17 18:58:06 -07001#ifndef AOS_IPC_LIB_ROBUST_OWNERSHIP_TRACKER_H_
2#define AOS_IPC_LIB_ROBUST_OWNERSHIP_TRACKER_H_
3
4#include <linux/futex.h>
Philipp Schrader81fa3fb2023-09-17 18:58:35 -07005#include <sys/syscall.h>
Philipp Schraderab2f8432023-09-17 18:58:06 -07006
7#include <string>
8
9#include "aos/ipc_lib/aos_sync.h"
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070010#include "aos/util/top.h"
Philipp Schraderab2f8432023-09-17 18:58:06 -070011
12namespace aos::ipc_lib {
13
14// Results of atomically loading the ownership state via RobustOwnershipTracker
15// below. This allows the state to be compared and queried later.
16class ThreadOwnerStatusSnapshot {
17 public:
18 ThreadOwnerStatusSnapshot() : futex_(0) {}
19 ThreadOwnerStatusSnapshot(aos_futex futex) : futex_(futex) {}
20 ThreadOwnerStatusSnapshot(const ThreadOwnerStatusSnapshot &) = default;
21 ThreadOwnerStatusSnapshot &operator=(const ThreadOwnerStatusSnapshot &) =
22 default;
23 ThreadOwnerStatusSnapshot(ThreadOwnerStatusSnapshot &&) = default;
24 ThreadOwnerStatusSnapshot &operator=(ThreadOwnerStatusSnapshot &&) = default;
25
26 // Returns if the owner died as noticed by the robust futex using Acquire
27 // memory ordering.
28 bool OwnerIsDead() const { return (futex_ & FUTEX_OWNER_DIED) != 0; }
29
30 // Returns true if no one has claimed ownership.
31 bool IsUnclaimed() const { return futex_ == 0; }
32
Philipp Schraderab2f8432023-09-17 18:58:06 -070033 // Returns the thread ID (a.k.a. "tid") of the owning thread. Use this when
34 // trying to access the /proc entry that corresponds to the owning thread for
35 // example. Do not use the futex value directly.
36 pid_t tid() const { return futex_ & FUTEX_TID_MASK; }
37
38 bool operator==(const ThreadOwnerStatusSnapshot &other) const {
39 return other.futex_ == futex_;
40 }
41
42 private:
43 aos_futex futex_;
44};
45
46// This object reliably tracks a thread owning a resource. A single thread may
47// possess multiple resources like senders and receivers. Each resource can have
48// its own instance of this class. These instances are responsible for
49// monitoring the thread that owns them. Each resource can use its instance of
50// this class to reliably check whether the owning thread is no longer alive.
51//
52// All methods other than Load* must be accessed under a mutex.
53class RobustOwnershipTracker {
54 public:
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070055 static constexpr uint64_t kNoStartTimeTicks =
56 std::numeric_limits<uint64_t>::max();
57
58 static uint64_t ReadStartTimeTicks(pid_t tid) {
59 if (tid == 0) {
60 return kNoStartTimeTicks;
61 }
62 std::optional<aos::util::ProcStat> proc_stat = util::ReadProcStat(tid);
63 if (!proc_stat.has_value()) {
64 return kNoStartTimeTicks;
65 }
66 return proc_stat->start_time_ticks;
67 }
68
69 // Loads the realtime-compatible contents of the ownership tracker with
70 // Acquire memory ordering.
Philipp Schraderab2f8432023-09-17 18:58:06 -070071 ThreadOwnerStatusSnapshot LoadAcquire() const {
72 return ThreadOwnerStatusSnapshot(
73 __atomic_load_n(&(mutex_.futex), __ATOMIC_ACQUIRE));
74 }
75
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070076 // Loads all the realtime-compatible contents of the ownership tracker with
77 // Relaxed memory order.
Philipp Schraderab2f8432023-09-17 18:58:06 -070078 ThreadOwnerStatusSnapshot LoadRelaxed() const {
79 return ThreadOwnerStatusSnapshot(
80 __atomic_load_n(&(mutex_.futex), __ATOMIC_RELAXED));
81 }
82
Philipp Schrader81fa3fb2023-09-17 18:58:35 -070083 // Checks both the robust futex and dredges through /proc to see if the thread
84 // is alive. As per the class description, this must only be called under a
85 // mutex. This must not be called in a realtime context and it is slow.
86 bool OwnerIsDefinitelyAbsolutelyDead() const {
87 auto loaded = LoadAcquire();
88 if (loaded.OwnerIsDead()) {
89 return true;
90 }
91 if (loaded.IsUnclaimed()) {
92 return false;
93 }
94 const uint64_t proc_start_time_ticks = ReadStartTimeTicks(loaded.tid());
95 if (proc_start_time_ticks == kNoStartTimeTicks) {
96 LOG(ERROR) << "Detected that PID " << loaded.tid() << " died.";
97 return true;
98 }
99
100 if (proc_start_time_ticks != start_time_ticks_) {
101 LOG(ERROR) << "Detected that PID " << loaded.tid()
102 << " died from a starttime missmatch.";
103 return true;
104 }
105 return false;
106 }
107
Philipp Schraderab2f8432023-09-17 18:58:06 -0700108 // Clears all ownership state.
109 //
110 // This should only really be called if you are 100% certain that the owner is
111 // dead. Use `LoadAquire().OwnerIsDead()` to determine this.
112 void ForceClear() {
113 // Must be opposite order of Acquire.
114 // We only deal with the futex here because we don't want to change anything
115 // about the linked list. We just want to release ownership here. We still
116 // want the kernel to know about this element via the linked list the next
117 // time someone takes ownership.
118 __atomic_store_n(&(mutex_.futex), 0, __ATOMIC_RELEASE);
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700119 start_time_ticks_ = kNoStartTimeTicks;
Philipp Schraderab2f8432023-09-17 18:58:06 -0700120 }
121
122 // Returns true if this thread holds ownership.
123 bool IsHeldBySelf() { return death_notification_is_held(&mutex_); }
124
125 // Acquires ownership. Other threads will know that this thread holds the
126 // ownership or be notified if this thread dies.
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700127 void Acquire() {
128 pid_t tid = syscall(SYS_gettid);
129 assert(tid > 0);
130 const uint64_t proc_start_time_ticks = ReadStartTimeTicks(tid);
131 CHECK_NE(proc_start_time_ticks, kNoStartTimeTicks);
132
133 start_time_ticks_ = proc_start_time_ticks;
134 death_notification_init(&mutex_);
135 }
Philipp Schraderab2f8432023-09-17 18:58:06 -0700136
137 // Releases ownership.
138 //
139 // This should only be called from the owning thread.
140 void Release() {
141 // Must be opposite order of Acquire.
142 death_notification_release(&mutex_);
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700143 start_time_ticks_ = kNoStartTimeTicks;
Philipp Schraderab2f8432023-09-17 18:58:06 -0700144 }
145
146 // Marks the owner as dead if the specified tid is the current owner. In other
147 // words, after this call, a call to `LoadAcquire().OwnerIsDead()` may start
148 // returning true.
149 //
150 // The motivation here is for use in testing. DO NOT USE in production code.
151 // The logic here is only good enough for testing.
152 bool PretendThatOwnerIsDeadForTesting(pid_t tid);
153
154 // Returns a string representing this object.
155 std::string DebugString() const;
156
157 private:
158 // Robust futex to track ownership the normal way. The futex is inside the
159 // mutex here. We use the wrapper mutex because the death_notification_*
160 // functions operate on that instead of the futex directly.
161 //
162 // We use a futex here because:
163 // - futexes are fast.
164 // - The kernel can atomically clean up a dead thread and mark the futex
165 // appropriately.
166 // - Owners can clean up after dead threads.
167 aos_mutex mutex_;
Philipp Schrader81fa3fb2023-09-17 18:58:35 -0700168
169 // Thread's start time ticks.
170 std::atomic<uint64_t> start_time_ticks_;
Philipp Schraderab2f8432023-09-17 18:58:06 -0700171};
172
173} // namespace aos::ipc_lib
174
175#endif // AOS_IPC_LIB_ROBUST_OWNERSHIP_TRACKER_H_