James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 1 | #ifndef AOS_UTIL_TOP_H_ |
| 2 | #define AOS_UTIL_TOP_H_ |
| 3 | |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame] | 4 | #include <stdint.h> |
| 5 | #include <sys/types.h> |
| 6 | |
| 7 | #include <chrono> |
| 8 | #include <functional> |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 9 | #include <map> |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame] | 10 | #include <optional> |
| 11 | #include <set> |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 12 | #include <string> |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame] | 13 | #include <utility> |
| 14 | |
| 15 | #include "flatbuffers/buffer.h" |
| 16 | #include "flatbuffers/flatbuffer_builder.h" |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 17 | |
| 18 | #include "aos/containers/ring_buffer.h" |
| 19 | #include "aos/events/event_loop.h" |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame] | 20 | #include "aos/time/time.h" |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 21 | #include "aos/util/process_info_generated.h" |
| 22 | |
| 23 | namespace aos::util { |
| 24 | |
| 25 | // ProcStat is a struct to hold all the fields available in /proc/[pid]/stat. |
Austin Schuh | 5db29f2 | 2024-03-16 17:00:31 -0700 | [diff] [blame] | 26 | // Currently we only use a small subset of the fields. See man 5 proc for |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 27 | // details on what the fields are--these are in the same order as they appear in |
| 28 | // the stat file. |
| 29 | // |
| 30 | // Things are signed or unsigned based on whether they are listed |
| 31 | // as signed/unsigned in man 5 proc. We just make everything 64 bits wide |
| 32 | // because otherwise we have to write out way too many casts everywhere. |
| 33 | struct ProcStat { |
| 34 | int pid; |
| 35 | std::string name; |
| 36 | char state; |
| 37 | int64_t parent_pid; |
| 38 | int64_t group_id; |
| 39 | int64_t session_id; |
| 40 | int64_t tty; |
| 41 | int64_t tpgid; |
| 42 | uint64_t kernel_flags; |
| 43 | uint64_t minor_faults; |
| 44 | uint64_t children_minor_faults; |
| 45 | uint64_t major_faults; |
| 46 | uint64_t children_major_faults; |
| 47 | uint64_t user_mode_ticks; |
| 48 | uint64_t kernel_mode_ticks; |
| 49 | int64_t children_user_mode_ticks; |
| 50 | int64_t children_kernel_mode_ticks; |
| 51 | int64_t priority; |
| 52 | int64_t nice; |
| 53 | int64_t num_threads; |
| 54 | int64_t itrealvalue; // always zero. |
| 55 | uint64_t start_time_ticks; |
| 56 | uint64_t virtual_memory_size; |
| 57 | // Number of pages in real memory. |
| 58 | int64_t resident_set_size; |
| 59 | uint64_t rss_soft_limit; |
| 60 | uint64_t start_code_address; |
| 61 | uint64_t end_code_address; |
| 62 | uint64_t start_stack_address; |
| 63 | uint64_t stack_pointer; |
| 64 | uint64_t instruction_pointer; |
| 65 | uint64_t signal_bitmask; |
| 66 | uint64_t blocked_signals; |
| 67 | uint64_t ignored_signals; |
| 68 | uint64_t caught_signals; |
| 69 | uint64_t wchan; |
| 70 | // swap_pages fields are not maintained. |
| 71 | uint64_t swap_pages; |
| 72 | uint64_t children_swap_pages; |
| 73 | int64_t exit_signal; |
| 74 | // CPU number last exitted on. |
| 75 | int64_t processor; |
| 76 | // Zero for non-realtime processes. |
| 77 | uint64_t rt_priority; |
| 78 | uint64_t scheduling_policy; |
| 79 | // Aggregated block I/O delay. |
| 80 | uint64_t block_io_delay_ticks; |
| 81 | uint64_t guest_ticks; |
| 82 | uint64_t children_guest_ticks; |
| 83 | uint64_t start_data_address; |
| 84 | uint64_t end_data_address; |
| 85 | uint64_t start_brk_address; |
| 86 | uint64_t start_arg_address; |
| 87 | uint64_t end_arg_address; |
| 88 | uint64_t start_env_address; |
| 89 | uint64_t end_env_address; |
| 90 | int64_t exit_code; |
| 91 | }; |
| 92 | |
Maxwell Gumley | b27245f | 2024-04-11 15:46:22 -0600 | [diff] [blame] | 93 | // Retrieves the statistics for a particular process or thread. If only a pid is |
| 94 | // provided, it reads the process's stat file at /proc/[pid]/stat. If both pid |
| 95 | // and tid are provided, it reads the thread's stat file at |
| 96 | // /proc/[pid]/task/[tid]/stat. Returns nullopt if unable to read or parse the |
| 97 | // file. |
| 98 | std::optional<ProcStat> ReadProcStat(pid_t pid, |
| 99 | std::optional<pid_t> tid = std::nullopt); |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 100 | |
| 101 | // This class provides a basic utility for retrieving general performance |
| 102 | // information on running processes (named after the top utility). It can either |
| 103 | // be used to directly get information on individual processes (via |
| 104 | // set_track_pids()) or used to track a list of the top N processes with the |
| 105 | // highest CPU usage. |
| 106 | // Note that this currently relies on sampling processes in /proc every second |
| 107 | // and using the differences between the two readings to calculate CPU usage. |
| 108 | // For crash-looping processees or other situations with highly variable or |
| 109 | // extremely short-lived loads, this may do a poor job of capturing information. |
| 110 | class Top { |
| 111 | public: |
Maxwell Gumley | b27245f | 2024-04-11 15:46:22 -0600 | [diff] [blame] | 112 | // Set the ring buffer size to 2 so we can keep track of a current reading and |
| 113 | // previous reading. |
| 114 | static constexpr int kRingBufferSize = 2; |
| 115 | |
Austin Schuh | 979d477 | 2022-12-30 14:50:41 -0800 | [diff] [blame] | 116 | // A snapshot of the resource usage of a process. |
| 117 | struct Reading { |
| 118 | aos::monotonic_clock::time_point reading_time; |
| 119 | std::chrono::nanoseconds total_run_time; |
| 120 | // Memory usage in bytes. |
| 121 | uint64_t memory_usage; |
| 122 | }; |
| 123 | |
Maxwell Gumley | b27245f | 2024-04-11 15:46:22 -0600 | [diff] [blame] | 124 | struct ThreadReading { |
| 125 | aos::monotonic_clock::time_point reading_time; |
| 126 | std::chrono::nanoseconds total_run_time; |
| 127 | }; |
| 128 | |
| 129 | struct ThreadReadings { |
| 130 | aos::RingBuffer<ThreadReading, kRingBufferSize> readings; |
| 131 | double cpu_percent; |
| 132 | std::string name; // Name of the thread |
| 133 | aos::monotonic_clock::time_point start_time; |
| 134 | ThreadState state; |
| 135 | }; |
| 136 | |
Austin Schuh | 979d477 | 2022-12-30 14:50:41 -0800 | [diff] [blame] | 137 | // All the information we have about a process. |
| 138 | struct ProcessReadings { |
| 139 | std::string name; |
| 140 | aos::monotonic_clock::time_point start_time; |
| 141 | // CPU usage is based on the past two readings. |
| 142 | double cpu_percent; |
| 143 | // True if this is a kernel thread, false if this is a userspace thread. |
| 144 | bool kthread; |
| 145 | // Last 2 readings |
Maxwell Gumley | b27245f | 2024-04-11 15:46:22 -0600 | [diff] [blame] | 146 | aos::RingBuffer<Reading, kRingBufferSize> readings; |
| 147 | std::map<pid_t, ThreadReadings> thread_readings; |
Austin Schuh | 979d477 | 2022-12-30 14:50:41 -0800 | [diff] [blame] | 148 | }; |
| 149 | |
Maxwell Gumley | b27245f | 2024-04-11 15:46:22 -0600 | [diff] [blame] | 150 | // An enum for track_threads with enabled and disabled |
| 151 | enum class TrackThreadsMode { |
| 152 | kDisabled, |
| 153 | kEnabled // Track the thread ids for each process. |
| 154 | }; |
| 155 | |
| 156 | // An enum for track_per_thread_info with enabled and disabled |
| 157 | enum class TrackPerThreadInfoMode { |
| 158 | kDisabled, |
| 159 | kEnabled // Track statistics for each thread. |
| 160 | }; |
| 161 | |
| 162 | // Constructs a new Top object. |
| 163 | // event_loop: The event loop object to be used. |
| 164 | // track_threads: Set to true to track the thread IDs for each process. |
| 165 | // track_per_thread_info: Set to true to track statistics for each thread. |
| 166 | Top(aos::EventLoop *event_loop, TrackThreadsMode track_threads, |
| 167 | TrackPerThreadInfoMode track_per_thread_info_mode); |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 168 | |
| 169 | // Set whether to track all the top processes (this will result in us having |
| 170 | // to track every single process on the system, so that we can sort them). |
| 171 | void set_track_top_processes(bool track_all) { track_all_ = track_all; } |
| 172 | |
Austin Schuh | 608514f | 2022-12-30 15:51:30 -0800 | [diff] [blame] | 173 | void set_on_reading_update(std::function<void()> fn) { |
| 174 | on_reading_update_ = std::move(fn); |
| 175 | } |
| 176 | |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 177 | // Specify a set of individual processes to track statistics for. |
| 178 | // This can be changed at run-time, although it may take up to kSamplePeriod |
| 179 | // to have full statistics on all the relevant processes, since we need at |
| 180 | // least two samples to estimate CPU usage. |
| 181 | void set_track_pids(const std::set<pid_t> &pids) { pids_to_track_ = pids; } |
| 182 | |
| 183 | // Retrieve statistics for the specified process. Will return the null offset |
| 184 | // of no such pid is being tracked. |
| 185 | flatbuffers::Offset<ProcessInfo> InfoForProcess( |
| 186 | flatbuffers::FlatBufferBuilder *fbb, pid_t pid); |
| 187 | |
| 188 | // Returns information on up to n processes, sorted by CPU usage. |
| 189 | flatbuffers::Offset<TopProcessesFbs> TopProcesses( |
| 190 | flatbuffers::FlatBufferBuilder *fbb, int n); |
| 191 | |
Austin Schuh | 979d477 | 2022-12-30 14:50:41 -0800 | [diff] [blame] | 192 | const std::map<pid_t, ProcessReadings> &readings() const { return readings_; } |
| 193 | |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 194 | private: |
| 195 | // Rate at which to sample /proc/[pid]/stat. |
| 196 | static constexpr std::chrono::seconds kSamplePeriod{1}; |
| 197 | |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 198 | std::chrono::nanoseconds TotalProcessTime(const ProcStat &proc_stat); |
| 199 | aos::monotonic_clock::time_point ProcessStartTime(const ProcStat &proc_stat); |
| 200 | uint64_t RealMemoryUsage(const ProcStat &proc_stat); |
| 201 | void UpdateReadings(); |
Maxwell Gumley | b27245f | 2024-04-11 15:46:22 -0600 | [diff] [blame] | 202 | void UpdateThreadReadings(pid_t pid, ProcessReadings &process); |
milind-u | eb075d2 | 2023-02-24 14:57:43 -0800 | [diff] [blame] | 203 | // Adds thread ids for the given pid to the pids set, |
| 204 | // if we are tracking threads. |
| 205 | void MaybeAddThreadIds(pid_t pid, std::set<pid_t> *pids); |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 206 | |
| 207 | aos::EventLoop *event_loop_; |
| 208 | |
| 209 | // Length of a clock tick (used to convert from raw numbers in /proc to actual |
| 210 | // times). |
| 211 | const std::chrono::nanoseconds clock_tick_; |
| 212 | // Page size, in bytes, on the current system. |
| 213 | const long page_size_; |
| 214 | |
| 215 | std::set<pid_t> pids_to_track_; |
| 216 | bool track_all_ = false; |
Maxwell Gumley | b27245f | 2024-04-11 15:46:22 -0600 | [diff] [blame] | 217 | TrackThreadsMode track_threads_; |
| 218 | |
| 219 | // Whether to include per-thread information in the top processes. |
| 220 | TrackPerThreadInfoMode track_per_thread_info_; |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 221 | |
| 222 | std::map<pid_t, ProcessReadings> readings_; |
Austin Schuh | 608514f | 2022-12-30 15:51:30 -0800 | [diff] [blame] | 223 | |
| 224 | std::function<void()> on_reading_update_; |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 225 | }; |
| 226 | |
| 227 | } // namespace aos::util |
| 228 | #endif // AOS_UTIL_TOP_H_ |