James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 1 | #include "aos/util/top.h" |
| 2 | |
| 3 | #include <dirent.h> |
| 4 | #include <unistd.h> |
| 5 | |
| 6 | #include <queue> |
| 7 | #include <string> |
| 8 | |
| 9 | #include "absl/strings/numbers.h" |
| 10 | #include "absl/strings/str_format.h" |
| 11 | #include "absl/strings/str_split.h" |
| 12 | |
Austin Schuh | 979d477 | 2022-12-30 14:50:41 -0800 | [diff] [blame] | 13 | #define PF_KTHREAD 0x00200000 |
| 14 | |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 15 | namespace aos::util { |
| 16 | namespace { |
| 17 | std::optional<std::string> ReadShortFile(std::string_view file_name) { |
| 18 | // Open as input and seek to end immediately. |
| 19 | std::ifstream file(std::string(file_name), std::ios_base::in); |
| 20 | if (!file.good()) { |
| 21 | VLOG(1) << "Can't read " << file_name; |
| 22 | return std::nullopt; |
| 23 | } |
| 24 | const size_t kMaxLineLength = 4096; |
| 25 | char buffer[kMaxLineLength]; |
| 26 | file.read(buffer, kMaxLineLength); |
| 27 | if (!file.eof()) { |
| 28 | return std::nullopt; |
| 29 | } |
| 30 | return std::string(buffer, file.gcount()); |
| 31 | } |
| 32 | } // namespace |
| 33 | |
| 34 | std::optional<ProcStat> ReadProcStat(pid_t pid) { |
| 35 | std::optional<std::string> contents = |
| 36 | ReadShortFile(absl::StrFormat("/proc/%d/stat", pid)); |
| 37 | if (!contents.has_value()) { |
| 38 | return std::nullopt; |
| 39 | } |
| 40 | const size_t start_name = contents->find_first_of('('); |
| 41 | const size_t end_name = contents->find_last_of(')'); |
| 42 | if (start_name == std::string::npos || end_name == std::string::npos || |
| 43 | end_name < start_name) { |
| 44 | VLOG(1) << "No name found in stat line " << contents.value(); |
| 45 | return std::nullopt; |
| 46 | } |
| 47 | std::string_view name(contents->c_str() + start_name + 1, |
| 48 | end_name - start_name - 1); |
| 49 | |
| 50 | std::vector<std::string_view> fields = |
| 51 | absl::StrSplit(std::string_view(contents->c_str() + end_name + 1, |
| 52 | contents->size() - end_name - 1), |
| 53 | ' ', absl::SkipWhitespace()); |
| 54 | constexpr int kNumFieldsAfterName = 50; |
| 55 | if (fields.size() != kNumFieldsAfterName) { |
| 56 | VLOG(1) << "Incorrect number of fields " << fields.size(); |
| 57 | return std::nullopt; |
| 58 | } |
| 59 | // The first field is a character for the current process state; every single |
| 60 | // field after that should be an integer. |
| 61 | if (fields[0].size() != 1) { |
| 62 | VLOG(1) << "State field is too long: " << fields[0]; |
| 63 | return std::nullopt; |
| 64 | } |
| 65 | std::array<absl::int128, kNumFieldsAfterName - 1> numbers; |
| 66 | for (int ii = 1; ii < kNumFieldsAfterName; ++ii) { |
| 67 | if (!absl::SimpleAtoi(fields[ii], &numbers[ii - 1])) { |
| 68 | VLOG(1) << "Failed to parse field " << ii << " as number: " << fields[ii]; |
| 69 | return std::nullopt; |
| 70 | } |
| 71 | } |
| 72 | return ProcStat{ |
| 73 | .pid = pid, |
| 74 | .name = std::string(name), |
| 75 | .state = fields.at(0).at(0), |
| 76 | .parent_pid = static_cast<int64_t>(numbers.at(0)), |
| 77 | .group_id = static_cast<int64_t>(numbers.at(1)), |
| 78 | .session_id = static_cast<int64_t>(numbers.at(2)), |
| 79 | .tty = static_cast<int64_t>(numbers.at(3)), |
| 80 | .tpgid = static_cast<int64_t>(numbers.at(4)), |
| 81 | .kernel_flags = static_cast<uint64_t>(numbers.at(5)), |
| 82 | .minor_faults = static_cast<uint64_t>(numbers.at(6)), |
| 83 | .children_minor_faults = static_cast<uint64_t>(numbers.at(7)), |
| 84 | .major_faults = static_cast<uint64_t>(numbers.at(8)), |
| 85 | .children_major_faults = static_cast<uint64_t>(numbers.at(9)), |
| 86 | .user_mode_ticks = static_cast<uint64_t>(numbers.at(10)), |
| 87 | .kernel_mode_ticks = static_cast<uint64_t>(numbers.at(11)), |
| 88 | .children_user_mode_ticks = static_cast<int64_t>(numbers.at(12)), |
| 89 | .children_kernel_mode_ticks = static_cast<int64_t>(numbers.at(13)), |
| 90 | .priority = static_cast<int64_t>(numbers.at(14)), |
| 91 | .nice = static_cast<int64_t>(numbers.at(15)), |
| 92 | .num_threads = static_cast<int64_t>(numbers.at(16)), |
| 93 | .itrealvalue = static_cast<int64_t>(numbers.at(17)), |
| 94 | .start_time_ticks = static_cast<uint64_t>(numbers.at(18)), |
| 95 | .virtual_memory_size = static_cast<uint64_t>(numbers.at(19)), |
| 96 | .resident_set_size = static_cast<int64_t>(numbers.at(20)), |
| 97 | .rss_soft_limit = static_cast<uint64_t>(numbers.at(21)), |
| 98 | .start_code_address = static_cast<uint64_t>(numbers.at(22)), |
| 99 | .end_code_address = static_cast<uint64_t>(numbers.at(23)), |
| 100 | .start_stack_address = static_cast<uint64_t>(numbers.at(24)), |
| 101 | .stack_pointer = static_cast<uint64_t>(numbers.at(25)), |
| 102 | .instruction_pointer = static_cast<uint64_t>(numbers.at(26)), |
| 103 | .signal_bitmask = static_cast<uint64_t>(numbers.at(27)), |
| 104 | .blocked_signals = static_cast<uint64_t>(numbers.at(28)), |
| 105 | .ignored_signals = static_cast<uint64_t>(numbers.at(29)), |
| 106 | .caught_signals = static_cast<uint64_t>(numbers.at(30)), |
| 107 | .wchan = static_cast<uint64_t>(numbers.at(31)), |
| 108 | .swap_pages = static_cast<uint64_t>(numbers.at(32)), |
| 109 | .children_swap_pages = static_cast<uint64_t>(numbers.at(33)), |
| 110 | .exit_signal = static_cast<int64_t>(numbers.at(34)), |
| 111 | .processor = static_cast<int64_t>(numbers.at(35)), |
| 112 | .rt_priority = static_cast<uint64_t>(numbers.at(36)), |
| 113 | .scheduling_policy = static_cast<uint64_t>(numbers.at(37)), |
| 114 | .block_io_delay_ticks = static_cast<uint64_t>(numbers.at(38)), |
| 115 | .guest_ticks = static_cast<uint64_t>(numbers.at(39)), |
| 116 | .children_guest_ticks = static_cast<uint64_t>(numbers.at(40)), |
| 117 | .start_data_address = static_cast<uint64_t>(numbers.at(41)), |
| 118 | .end_data_address = static_cast<uint64_t>(numbers.at(42)), |
| 119 | .start_brk_address = static_cast<uint64_t>(numbers.at(43)), |
| 120 | .start_arg_address = static_cast<uint64_t>(numbers.at(44)), |
| 121 | .end_arg_address = static_cast<uint64_t>(numbers.at(45)), |
| 122 | .start_env_address = static_cast<uint64_t>(numbers.at(46)), |
| 123 | .end_env_address = static_cast<uint64_t>(numbers.at(47)), |
| 124 | .exit_code = static_cast<int64_t>(numbers.at(48))}; |
| 125 | } |
| 126 | |
milind-u | eb075d2 | 2023-02-24 14:57:43 -0800 | [diff] [blame^] | 127 | Top::Top(aos::EventLoop *event_loop, bool track_threads) |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 128 | : event_loop_(event_loop), |
| 129 | clock_tick_(std::chrono::nanoseconds(1000000000 / sysconf(_SC_CLK_TCK))), |
milind-u | eb075d2 | 2023-02-24 14:57:43 -0800 | [diff] [blame^] | 130 | page_size_(sysconf(_SC_PAGESIZE)), |
| 131 | track_threads_(track_threads) { |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 132 | TimerHandler *timer = event_loop_->AddTimer([this]() { UpdateReadings(); }); |
| 133 | event_loop_->OnRun([timer, this]() { |
| 134 | timer->Setup(event_loop_->monotonic_now(), kSamplePeriod); |
| 135 | }); |
| 136 | } |
| 137 | |
| 138 | std::chrono::nanoseconds Top::TotalProcessTime(const ProcStat &proc_stat) { |
| 139 | return (proc_stat.user_mode_ticks + proc_stat.kernel_mode_ticks) * |
| 140 | clock_tick_; |
| 141 | } |
| 142 | |
| 143 | aos::monotonic_clock::time_point Top::ProcessStartTime( |
| 144 | const ProcStat &proc_stat) { |
| 145 | return aos::monotonic_clock::time_point(proc_stat.start_time_ticks * |
| 146 | clock_tick_); |
| 147 | } |
| 148 | |
| 149 | uint64_t Top::RealMemoryUsage(const ProcStat &proc_stat) { |
| 150 | return proc_stat.resident_set_size * page_size_; |
| 151 | } |
| 152 | |
milind-u | eb075d2 | 2023-02-24 14:57:43 -0800 | [diff] [blame^] | 153 | void Top::MaybeAddThreadIds(pid_t pid, std::set<pid_t> *pids) { |
| 154 | if (!track_threads_) { |
| 155 | return; |
| 156 | } |
| 157 | |
| 158 | // Add all the threads in /proc/pid/task |
| 159 | std::string task_dir = absl::StrCat("/proc/", std::to_string(pid), "/task/"); |
| 160 | DIR *dir = opendir(task_dir.data()); |
| 161 | if (dir == nullptr) { |
| 162 | LOG(WARNING) << "Unable to open " << task_dir; |
| 163 | return; |
| 164 | } |
| 165 | |
| 166 | while (true) { |
| 167 | struct dirent *const dir_entry = readdir(dir); |
| 168 | if (dir_entry == nullptr) { |
| 169 | break; |
| 170 | } |
| 171 | pid_t tid; |
| 172 | if (absl::SimpleAtoi(dir_entry->d_name, &tid)) { |
| 173 | pids->emplace(tid); |
| 174 | } |
| 175 | } |
| 176 | closedir(dir); |
| 177 | } |
| 178 | |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 179 | void Top::UpdateReadings() { |
| 180 | aos::monotonic_clock::time_point now = event_loop_->monotonic_now(); |
| 181 | // Get all the processes that we *might* care about. |
| 182 | std::set<pid_t> pids = pids_to_track_; |
James Kuszmaul | 63a4548 | 2022-04-19 16:12:01 -0700 | [diff] [blame] | 183 | // Ensure that we check on the status of every process that we are already |
| 184 | // tracking. |
Austin Schuh | 60e7794 | 2022-05-16 17:48:24 -0700 | [diff] [blame] | 185 | for (const auto &reading : readings_) { |
James Kuszmaul | 63a4548 | 2022-04-19 16:12:01 -0700 | [diff] [blame] | 186 | pids.insert(reading.first); |
milind-u | eb075d2 | 2023-02-24 14:57:43 -0800 | [diff] [blame^] | 187 | MaybeAddThreadIds(reading.first, &pids); |
James Kuszmaul | 63a4548 | 2022-04-19 16:12:01 -0700 | [diff] [blame] | 188 | } |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 189 | if (track_all_) { |
| 190 | DIR *const dir = opendir("/proc"); |
| 191 | if (dir == nullptr) { |
| 192 | PLOG(FATAL) << "Failed to open /proc"; |
| 193 | } |
| 194 | while (true) { |
| 195 | struct dirent *const dir_entry = readdir(dir); |
| 196 | if (dir_entry == nullptr) { |
| 197 | break; |
| 198 | } |
| 199 | pid_t pid; |
| 200 | if (dir_entry->d_type == DT_DIR && |
| 201 | absl::SimpleAtoi(dir_entry->d_name, &pid)) { |
| 202 | pids.insert(pid); |
milind-u | eb075d2 | 2023-02-24 14:57:43 -0800 | [diff] [blame^] | 203 | MaybeAddThreadIds(pid, &pids); |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 204 | } |
| 205 | } |
James Kuszmaul | 28c9e39 | 2022-11-04 13:24:12 -0700 | [diff] [blame] | 206 | closedir(dir); |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 207 | } |
| 208 | |
| 209 | for (const pid_t pid : pids) { |
| 210 | std::optional<ProcStat> proc_stat = ReadProcStat(pid); |
| 211 | // Stop tracking processes that have died. |
| 212 | if (!proc_stat.has_value()) { |
| 213 | readings_.erase(pid); |
| 214 | continue; |
| 215 | } |
| 216 | const aos::monotonic_clock::time_point start_time = |
| 217 | ProcessStartTime(*proc_stat); |
| 218 | auto reading_iter = readings_.find(pid); |
| 219 | if (reading_iter == readings_.end()) { |
Austin Schuh | 979d477 | 2022-12-30 14:50:41 -0800 | [diff] [blame] | 220 | reading_iter = |
| 221 | readings_ |
| 222 | .insert(std::make_pair( |
| 223 | pid, ProcessReadings{.name = proc_stat->name, |
| 224 | .start_time = start_time, |
| 225 | .cpu_percent = 0.0, |
| 226 | .kthread = !!(proc_stat->kernel_flags & |
| 227 | PF_KTHREAD), |
| 228 | .readings = {}})) |
| 229 | .first; |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 230 | } |
| 231 | ProcessReadings &process = reading_iter->second; |
| 232 | // The process associated with the PID has changed; reset the state. |
| 233 | if (process.start_time != start_time) { |
| 234 | process.name = proc_stat->name; |
| 235 | process.start_time = start_time; |
| 236 | process.readings.Reset(); |
| 237 | } |
James Kuszmaul | 6b35e3a | 2022-04-06 15:00:39 -0700 | [diff] [blame] | 238 | // If the process name has changed (e.g., if our first reading for a process |
| 239 | // name occurred before execvp was called), then update it. |
| 240 | if (process.name != proc_stat->name) { |
| 241 | process.name = proc_stat->name; |
| 242 | } |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 243 | |
| 244 | process.readings.Push(Reading{now, TotalProcessTime(*proc_stat), |
| 245 | RealMemoryUsage(*proc_stat)}); |
| 246 | if (process.readings.size() == 2) { |
| 247 | process.cpu_percent = |
| 248 | aos::time::DurationInSeconds(process.readings[1].total_run_time - |
| 249 | process.readings[0].total_run_time) / |
| 250 | aos::time::DurationInSeconds(process.readings[1].reading_time - |
| 251 | process.readings[0].reading_time); |
| 252 | } else { |
| 253 | process.cpu_percent = 0.0; |
| 254 | } |
| 255 | } |
Austin Schuh | 608514f | 2022-12-30 15:51:30 -0800 | [diff] [blame] | 256 | |
| 257 | if (on_reading_update_) { |
| 258 | on_reading_update_(); |
| 259 | } |
James Kuszmaul | 418fd06 | 2022-03-22 15:22:27 -0700 | [diff] [blame] | 260 | } |
| 261 | |
| 262 | flatbuffers::Offset<ProcessInfo> Top::InfoForProcess( |
| 263 | flatbuffers::FlatBufferBuilder *fbb, pid_t pid) { |
| 264 | auto reading_iter = readings_.find(pid); |
| 265 | if (reading_iter == readings_.end()) { |
| 266 | return {}; |
| 267 | } |
| 268 | const ProcessReadings &reading = reading_iter->second; |
| 269 | const flatbuffers::Offset<flatbuffers::String> name = |
| 270 | fbb->CreateString(reading.name); |
| 271 | ProcessInfo::Builder builder(*fbb); |
| 272 | builder.add_pid(pid); |
| 273 | builder.add_name(name); |
| 274 | builder.add_cpu_usage(reading.cpu_percent); |
| 275 | builder.add_physical_memory( |
| 276 | reading.readings[reading.readings.size() - 1].memory_usage); |
| 277 | return builder.Finish(); |
| 278 | } |
| 279 | |
| 280 | flatbuffers::Offset<TopProcessesFbs> Top::TopProcesses( |
| 281 | flatbuffers::FlatBufferBuilder *fbb, int n) { |
| 282 | // Pair is {cpu_usage, pid}. |
| 283 | std::priority_queue<std::pair<double, pid_t>> cpu_usages; |
| 284 | for (const auto &pair : readings_) { |
| 285 | // Deliberately include 0.0 percent CPU things in the usage list so that if |
| 286 | // the user asks for an arbitrarily large number of processes they'll get |
| 287 | // everything. |
| 288 | cpu_usages.push(std::make_pair(pair.second.cpu_percent, pair.first)); |
| 289 | } |
| 290 | std::vector<flatbuffers::Offset<ProcessInfo>> offsets; |
| 291 | for (int ii = 0; ii < n && !cpu_usages.empty(); ++ii) { |
| 292 | offsets.push_back(InfoForProcess(fbb, cpu_usages.top().second)); |
| 293 | cpu_usages.pop(); |
| 294 | } |
| 295 | const flatbuffers::Offset< |
| 296 | flatbuffers::Vector<flatbuffers::Offset<ProcessInfo>>> |
| 297 | vector_offset = fbb->CreateVector(offsets); |
| 298 | TopProcessesFbs::Builder builder(*fbb); |
| 299 | builder.add_processes(vector_offset); |
| 300 | return builder.Finish(); |
| 301 | } |
| 302 | |
| 303 | } // namespace aos::util |