blob: c24d56cf79f00b23ef57ae9f757042338beb574f [file] [log] [blame]
James Kuszmaul418fd062022-03-22 15:22:27 -07001#ifndef AOS_UTIL_TOP_H_
2#define AOS_UTIL_TOP_H_
3
4#include <map>
5#include <string>
6
7#include "aos/containers/ring_buffer.h"
8#include "aos/events/event_loop.h"
9#include "aos/util/process_info_generated.h"
10
11namespace aos::util {
12
13// ProcStat is a struct to hold all the fields available in /proc/[pid]/stat.
Austin Schuh5db29f22024-03-16 17:00:31 -070014// Currently we only use a small subset of the fields. See man 5 proc for
James Kuszmaul418fd062022-03-22 15:22:27 -070015// details on what the fields are--these are in the same order as they appear in
16// the stat file.
17//
18// Things are signed or unsigned based on whether they are listed
19// as signed/unsigned in man 5 proc. We just make everything 64 bits wide
20// because otherwise we have to write out way too many casts everywhere.
21struct ProcStat {
22 int pid;
23 std::string name;
24 char state;
25 int64_t parent_pid;
26 int64_t group_id;
27 int64_t session_id;
28 int64_t tty;
29 int64_t tpgid;
30 uint64_t kernel_flags;
31 uint64_t minor_faults;
32 uint64_t children_minor_faults;
33 uint64_t major_faults;
34 uint64_t children_major_faults;
35 uint64_t user_mode_ticks;
36 uint64_t kernel_mode_ticks;
37 int64_t children_user_mode_ticks;
38 int64_t children_kernel_mode_ticks;
39 int64_t priority;
40 int64_t nice;
41 int64_t num_threads;
42 int64_t itrealvalue; // always zero.
43 uint64_t start_time_ticks;
44 uint64_t virtual_memory_size;
45 // Number of pages in real memory.
46 int64_t resident_set_size;
47 uint64_t rss_soft_limit;
48 uint64_t start_code_address;
49 uint64_t end_code_address;
50 uint64_t start_stack_address;
51 uint64_t stack_pointer;
52 uint64_t instruction_pointer;
53 uint64_t signal_bitmask;
54 uint64_t blocked_signals;
55 uint64_t ignored_signals;
56 uint64_t caught_signals;
57 uint64_t wchan;
58 // swap_pages fields are not maintained.
59 uint64_t swap_pages;
60 uint64_t children_swap_pages;
61 int64_t exit_signal;
62 // CPU number last exitted on.
63 int64_t processor;
64 // Zero for non-realtime processes.
65 uint64_t rt_priority;
66 uint64_t scheduling_policy;
67 // Aggregated block I/O delay.
68 uint64_t block_io_delay_ticks;
69 uint64_t guest_ticks;
70 uint64_t children_guest_ticks;
71 uint64_t start_data_address;
72 uint64_t end_data_address;
73 uint64_t start_brk_address;
74 uint64_t start_arg_address;
75 uint64_t end_arg_address;
76 uint64_t start_env_address;
77 uint64_t end_env_address;
78 int64_t exit_code;
79};
80
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060081// Retrieves the statistics for a particular process or thread. If only a pid is
82// provided, it reads the process's stat file at /proc/[pid]/stat. If both pid
83// and tid are provided, it reads the thread's stat file at
84// /proc/[pid]/task/[tid]/stat. Returns nullopt if unable to read or parse the
85// file.
86std::optional<ProcStat> ReadProcStat(pid_t pid,
87 std::optional<pid_t> tid = std::nullopt);
James Kuszmaul418fd062022-03-22 15:22:27 -070088
89// This class provides a basic utility for retrieving general performance
90// information on running processes (named after the top utility). It can either
91// be used to directly get information on individual processes (via
92// set_track_pids()) or used to track a list of the top N processes with the
93// highest CPU usage.
94// Note that this currently relies on sampling processes in /proc every second
95// and using the differences between the two readings to calculate CPU usage.
96// For crash-looping processees or other situations with highly variable or
97// extremely short-lived loads, this may do a poor job of capturing information.
98class Top {
99 public:
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600100 // Set the ring buffer size to 2 so we can keep track of a current reading and
101 // previous reading.
102 static constexpr int kRingBufferSize = 2;
103
Austin Schuh979d4772022-12-30 14:50:41 -0800104 // A snapshot of the resource usage of a process.
105 struct Reading {
106 aos::monotonic_clock::time_point reading_time;
107 std::chrono::nanoseconds total_run_time;
108 // Memory usage in bytes.
109 uint64_t memory_usage;
110 };
111
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600112 struct ThreadReading {
113 aos::monotonic_clock::time_point reading_time;
114 std::chrono::nanoseconds total_run_time;
115 };
116
117 struct ThreadReadings {
118 aos::RingBuffer<ThreadReading, kRingBufferSize> readings;
119 double cpu_percent;
120 std::string name; // Name of the thread
121 aos::monotonic_clock::time_point start_time;
122 ThreadState state;
123 };
124
Austin Schuh979d4772022-12-30 14:50:41 -0800125 // All the information we have about a process.
126 struct ProcessReadings {
127 std::string name;
128 aos::monotonic_clock::time_point start_time;
129 // CPU usage is based on the past two readings.
130 double cpu_percent;
131 // True if this is a kernel thread, false if this is a userspace thread.
132 bool kthread;
133 // Last 2 readings
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600134 aos::RingBuffer<Reading, kRingBufferSize> readings;
135 std::map<pid_t, ThreadReadings> thread_readings;
Austin Schuh979d4772022-12-30 14:50:41 -0800136 };
137
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600138 // An enum for track_threads with enabled and disabled
139 enum class TrackThreadsMode {
140 kDisabled,
141 kEnabled // Track the thread ids for each process.
142 };
143
144 // An enum for track_per_thread_info with enabled and disabled
145 enum class TrackPerThreadInfoMode {
146 kDisabled,
147 kEnabled // Track statistics for each thread.
148 };
149
150 // Constructs a new Top object.
151 // event_loop: The event loop object to be used.
152 // track_threads: Set to true to track the thread IDs for each process.
153 // track_per_thread_info: Set to true to track statistics for each thread.
154 Top(aos::EventLoop *event_loop, TrackThreadsMode track_threads,
155 TrackPerThreadInfoMode track_per_thread_info_mode);
James Kuszmaul418fd062022-03-22 15:22:27 -0700156
157 // Set whether to track all the top processes (this will result in us having
158 // to track every single process on the system, so that we can sort them).
159 void set_track_top_processes(bool track_all) { track_all_ = track_all; }
160
Austin Schuh608514f2022-12-30 15:51:30 -0800161 void set_on_reading_update(std::function<void()> fn) {
162 on_reading_update_ = std::move(fn);
163 }
164
James Kuszmaul418fd062022-03-22 15:22:27 -0700165 // Specify a set of individual processes to track statistics for.
166 // This can be changed at run-time, although it may take up to kSamplePeriod
167 // to have full statistics on all the relevant processes, since we need at
168 // least two samples to estimate CPU usage.
169 void set_track_pids(const std::set<pid_t> &pids) { pids_to_track_ = pids; }
170
171 // Retrieve statistics for the specified process. Will return the null offset
172 // of no such pid is being tracked.
173 flatbuffers::Offset<ProcessInfo> InfoForProcess(
174 flatbuffers::FlatBufferBuilder *fbb, pid_t pid);
175
176 // Returns information on up to n processes, sorted by CPU usage.
177 flatbuffers::Offset<TopProcessesFbs> TopProcesses(
178 flatbuffers::FlatBufferBuilder *fbb, int n);
179
Austin Schuh979d4772022-12-30 14:50:41 -0800180 const std::map<pid_t, ProcessReadings> &readings() const { return readings_; }
181
James Kuszmaul418fd062022-03-22 15:22:27 -0700182 private:
183 // Rate at which to sample /proc/[pid]/stat.
184 static constexpr std::chrono::seconds kSamplePeriod{1};
185
James Kuszmaul418fd062022-03-22 15:22:27 -0700186 std::chrono::nanoseconds TotalProcessTime(const ProcStat &proc_stat);
187 aos::monotonic_clock::time_point ProcessStartTime(const ProcStat &proc_stat);
188 uint64_t RealMemoryUsage(const ProcStat &proc_stat);
189 void UpdateReadings();
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600190 void UpdateThreadReadings(pid_t pid, ProcessReadings &process);
milind-ueb075d22023-02-24 14:57:43 -0800191 // Adds thread ids for the given pid to the pids set,
192 // if we are tracking threads.
193 void MaybeAddThreadIds(pid_t pid, std::set<pid_t> *pids);
James Kuszmaul418fd062022-03-22 15:22:27 -0700194
195 aos::EventLoop *event_loop_;
196
197 // Length of a clock tick (used to convert from raw numbers in /proc to actual
198 // times).
199 const std::chrono::nanoseconds clock_tick_;
200 // Page size, in bytes, on the current system.
201 const long page_size_;
202
203 std::set<pid_t> pids_to_track_;
204 bool track_all_ = false;
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600205 TrackThreadsMode track_threads_;
206
207 // Whether to include per-thread information in the top processes.
208 TrackPerThreadInfoMode track_per_thread_info_;
James Kuszmaul418fd062022-03-22 15:22:27 -0700209
210 std::map<pid_t, ProcessReadings> readings_;
Austin Schuh608514f2022-12-30 15:51:30 -0800211
212 std::function<void()> on_reading_update_;
James Kuszmaul418fd062022-03-22 15:22:27 -0700213};
214
215} // namespace aos::util
216#endif // AOS_UTIL_TOP_H_