blob: 8a786092e6e8f33eeea87971cea97e89d8df39e4 [file] [log] [blame]
James Kuszmaul418fd062022-03-22 15:22:27 -07001#include "aos/util/top.h"
2
3#include <dirent.h>
Stephan Pleinesb1177672024-05-27 17:48:32 -07004#include <errno.h>
James Kuszmaul418fd062022-03-22 15:22:27 -07005#include <unistd.h>
6
Stephan Pleinesb1177672024-05-27 17:48:32 -07007#include <algorithm>
8#include <array>
9#include <atomic>
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060010#include <cstring>
Stephan Pleinesb1177672024-05-27 17:48:32 -070011#include <istream>
James Kuszmaul418fd062022-03-22 15:22:27 -070012#include <queue>
Stephan Pleinesb1177672024-05-27 17:48:32 -070013#include <ratio>
James Kuszmaul418fd062022-03-22 15:22:27 -070014#include <string>
Stephan Pleinesb1177672024-05-27 17:48:32 -070015#include <string_view>
16#include <vector>
James Kuszmaul418fd062022-03-22 15:22:27 -070017
Stephan Pleinesb1177672024-05-27 17:48:32 -070018#include "absl/numeric/int128.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070019#include "absl/strings/numbers.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070020#include "absl/strings/str_cat.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070021#include "absl/strings/str_format.h"
22#include "absl/strings/str_split.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070023#include "flatbuffers/string.h"
24#include "flatbuffers/vector.h"
25#include "glog/logging.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070026
Austin Schuh979d4772022-12-30 14:50:41 -080027#define PF_KTHREAD 0x00200000
28
James Kuszmaul418fd062022-03-22 15:22:27 -070029namespace aos::util {
30namespace {
31std::optional<std::string> ReadShortFile(std::string_view file_name) {
32 // Open as input and seek to end immediately.
33 std::ifstream file(std::string(file_name), std::ios_base::in);
34 if (!file.good()) {
35 VLOG(1) << "Can't read " << file_name;
36 return std::nullopt;
37 }
38 const size_t kMaxLineLength = 4096;
39 char buffer[kMaxLineLength];
40 file.read(buffer, kMaxLineLength);
41 if (!file.eof()) {
42 return std::nullopt;
43 }
44 return std::string(buffer, file.gcount());
45}
46} // namespace
47
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060048std::optional<ProcStat> ReadProcStat(const pid_t pid,
49 const std::optional<pid_t> tid) {
50 const std::string path =
51 tid.has_value() ? absl::StrFormat("/proc/%d/task/%d/stat", pid, *tid)
52 : absl::StrFormat("/proc/%d/stat", pid);
53 const std::optional<std::string> contents = ReadShortFile(path);
James Kuszmaul418fd062022-03-22 15:22:27 -070054 if (!contents.has_value()) {
55 return std::nullopt;
56 }
57 const size_t start_name = contents->find_first_of('(');
58 const size_t end_name = contents->find_last_of(')');
59 if (start_name == std::string::npos || end_name == std::string::npos ||
60 end_name < start_name) {
61 VLOG(1) << "No name found in stat line " << contents.value();
62 return std::nullopt;
63 }
64 std::string_view name(contents->c_str() + start_name + 1,
65 end_name - start_name - 1);
66
67 std::vector<std::string_view> fields =
68 absl::StrSplit(std::string_view(contents->c_str() + end_name + 1,
69 contents->size() - end_name - 1),
70 ' ', absl::SkipWhitespace());
71 constexpr int kNumFieldsAfterName = 50;
72 if (fields.size() != kNumFieldsAfterName) {
73 VLOG(1) << "Incorrect number of fields " << fields.size();
74 return std::nullopt;
75 }
76 // The first field is a character for the current process state; every single
77 // field after that should be an integer.
78 if (fields[0].size() != 1) {
79 VLOG(1) << "State field is too long: " << fields[0];
80 return std::nullopt;
81 }
82 std::array<absl::int128, kNumFieldsAfterName - 1> numbers;
83 for (int ii = 1; ii < kNumFieldsAfterName; ++ii) {
84 if (!absl::SimpleAtoi(fields[ii], &numbers[ii - 1])) {
85 VLOG(1) << "Failed to parse field " << ii << " as number: " << fields[ii];
86 return std::nullopt;
87 }
88 }
89 return ProcStat{
90 .pid = pid,
91 .name = std::string(name),
92 .state = fields.at(0).at(0),
93 .parent_pid = static_cast<int64_t>(numbers.at(0)),
94 .group_id = static_cast<int64_t>(numbers.at(1)),
95 .session_id = static_cast<int64_t>(numbers.at(2)),
96 .tty = static_cast<int64_t>(numbers.at(3)),
97 .tpgid = static_cast<int64_t>(numbers.at(4)),
98 .kernel_flags = static_cast<uint64_t>(numbers.at(5)),
99 .minor_faults = static_cast<uint64_t>(numbers.at(6)),
100 .children_minor_faults = static_cast<uint64_t>(numbers.at(7)),
101 .major_faults = static_cast<uint64_t>(numbers.at(8)),
102 .children_major_faults = static_cast<uint64_t>(numbers.at(9)),
103 .user_mode_ticks = static_cast<uint64_t>(numbers.at(10)),
104 .kernel_mode_ticks = static_cast<uint64_t>(numbers.at(11)),
105 .children_user_mode_ticks = static_cast<int64_t>(numbers.at(12)),
106 .children_kernel_mode_ticks = static_cast<int64_t>(numbers.at(13)),
107 .priority = static_cast<int64_t>(numbers.at(14)),
108 .nice = static_cast<int64_t>(numbers.at(15)),
109 .num_threads = static_cast<int64_t>(numbers.at(16)),
110 .itrealvalue = static_cast<int64_t>(numbers.at(17)),
111 .start_time_ticks = static_cast<uint64_t>(numbers.at(18)),
112 .virtual_memory_size = static_cast<uint64_t>(numbers.at(19)),
113 .resident_set_size = static_cast<int64_t>(numbers.at(20)),
114 .rss_soft_limit = static_cast<uint64_t>(numbers.at(21)),
115 .start_code_address = static_cast<uint64_t>(numbers.at(22)),
116 .end_code_address = static_cast<uint64_t>(numbers.at(23)),
117 .start_stack_address = static_cast<uint64_t>(numbers.at(24)),
118 .stack_pointer = static_cast<uint64_t>(numbers.at(25)),
119 .instruction_pointer = static_cast<uint64_t>(numbers.at(26)),
120 .signal_bitmask = static_cast<uint64_t>(numbers.at(27)),
121 .blocked_signals = static_cast<uint64_t>(numbers.at(28)),
122 .ignored_signals = static_cast<uint64_t>(numbers.at(29)),
123 .caught_signals = static_cast<uint64_t>(numbers.at(30)),
124 .wchan = static_cast<uint64_t>(numbers.at(31)),
125 .swap_pages = static_cast<uint64_t>(numbers.at(32)),
126 .children_swap_pages = static_cast<uint64_t>(numbers.at(33)),
127 .exit_signal = static_cast<int64_t>(numbers.at(34)),
128 .processor = static_cast<int64_t>(numbers.at(35)),
129 .rt_priority = static_cast<uint64_t>(numbers.at(36)),
130 .scheduling_policy = static_cast<uint64_t>(numbers.at(37)),
131 .block_io_delay_ticks = static_cast<uint64_t>(numbers.at(38)),
132 .guest_ticks = static_cast<uint64_t>(numbers.at(39)),
133 .children_guest_ticks = static_cast<uint64_t>(numbers.at(40)),
134 .start_data_address = static_cast<uint64_t>(numbers.at(41)),
135 .end_data_address = static_cast<uint64_t>(numbers.at(42)),
136 .start_brk_address = static_cast<uint64_t>(numbers.at(43)),
137 .start_arg_address = static_cast<uint64_t>(numbers.at(44)),
138 .end_arg_address = static_cast<uint64_t>(numbers.at(45)),
139 .start_env_address = static_cast<uint64_t>(numbers.at(46)),
140 .end_env_address = static_cast<uint64_t>(numbers.at(47)),
141 .exit_code = static_cast<int64_t>(numbers.at(48))};
142}
143
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600144Top::Top(aos::EventLoop *event_loop, TrackThreadsMode track_threads,
145 TrackPerThreadInfoMode track_per_thread_info)
James Kuszmaul418fd062022-03-22 15:22:27 -0700146 : event_loop_(event_loop),
147 clock_tick_(std::chrono::nanoseconds(1000000000 / sysconf(_SC_CLK_TCK))),
milind-ueb075d22023-02-24 14:57:43 -0800148 page_size_(sysconf(_SC_PAGESIZE)),
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600149 track_threads_(track_threads),
150 track_per_thread_info_(track_per_thread_info) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700151 TimerHandler *timer = event_loop_->AddTimer([this]() { UpdateReadings(); });
152 event_loop_->OnRun([timer, this]() {
Philipp Schradera6712522023-07-05 20:25:11 -0700153 timer->Schedule(event_loop_->monotonic_now(), kSamplePeriod);
James Kuszmaul418fd062022-03-22 15:22:27 -0700154 });
155}
156
157std::chrono::nanoseconds Top::TotalProcessTime(const ProcStat &proc_stat) {
158 return (proc_stat.user_mode_ticks + proc_stat.kernel_mode_ticks) *
159 clock_tick_;
160}
161
162aos::monotonic_clock::time_point Top::ProcessStartTime(
163 const ProcStat &proc_stat) {
164 return aos::monotonic_clock::time_point(proc_stat.start_time_ticks *
165 clock_tick_);
166}
167
168uint64_t Top::RealMemoryUsage(const ProcStat &proc_stat) {
169 return proc_stat.resident_set_size * page_size_;
170}
171
milind-ueb075d22023-02-24 14:57:43 -0800172void Top::MaybeAddThreadIds(pid_t pid, std::set<pid_t> *pids) {
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600173 if (track_threads_ == TrackThreadsMode::kDisabled) {
milind-ueb075d22023-02-24 14:57:43 -0800174 return;
175 }
176
177 // Add all the threads in /proc/pid/task
178 std::string task_dir = absl::StrCat("/proc/", std::to_string(pid), "/task/");
179 DIR *dir = opendir(task_dir.data());
180 if (dir == nullptr) {
181 LOG(WARNING) << "Unable to open " << task_dir;
182 return;
183 }
184
185 while (true) {
186 struct dirent *const dir_entry = readdir(dir);
187 if (dir_entry == nullptr) {
188 break;
189 }
190 pid_t tid;
191 if (absl::SimpleAtoi(dir_entry->d_name, &tid)) {
192 pids->emplace(tid);
193 }
194 }
195 closedir(dir);
196}
197
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600198ThreadState CharToThreadState(const char state) {
199 switch (state) {
200 case 'R':
201 return ThreadState::RUNNING;
202 case 'S':
203 return ThreadState::SLEEPING_INTERRUPTIBLE;
204 case 'D':
205 return ThreadState::SLEEPING_UNINTERRUPTIBLE;
206 case 'T':
207 return ThreadState::STOPPED;
208 case 'Z':
209 return ThreadState::ZOMBIE;
210 case 'I':
211 return ThreadState::IDLE;
Austin Schuh1da47ac2024-05-10 16:42:49 -0700212 case 'X':
213 return ThreadState::DEAD;
214 case 't':
215 return ThreadState::TRACING_STOP;
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600216 default:
217 LOG(FATAL) << "Invalid thread state character: " << state;
218 }
219}
220
221void Top::UpdateThreadReadings(pid_t pid, ProcessReadings &process) {
222 // Construct the path to the task directory which lists all threads
223 std::string task_dir = absl::StrFormat("/proc/%d/task", pid);
224
225 // Verify we can open the directory.
226 DIR *dir = opendir(task_dir.c_str());
227 if (dir == nullptr) {
228 LOG_EVERY_T(WARNING, 10) << "Unable to open directory: " << task_dir
229 << ", error: " << strerror(errno);
230 ;
231 return;
232 }
233
234 // Use a set to track all the threads that we process.
235 std::set<pid_t> updated_threads;
236
237 // Iterate over all entries in the directory.
238 struct dirent *entry;
239 while ((entry = readdir(dir)) != nullptr) {
240 // Skip non-directories
241 if (entry->d_type != DT_DIR) {
242 continue;
243 }
244
245 // Skip "." and "..".
246 const bool is_current_dir = strcmp(entry->d_name, ".") == 0;
247 const bool is_parent_dir = strcmp(entry->d_name, "..") == 0;
248 if (is_current_dir || is_parent_dir) {
249 continue;
250 }
251
252 // Verify the entry is a valid thread ID.
253 pid_t tid;
254 const bool is_valid_thread_id = absl::SimpleAtoi(entry->d_name, &tid);
255 if (!is_valid_thread_id) {
256 continue;
257 }
258
259 // Read the stats for the thread.
260 const std::optional<ProcStat> thread_stats = ReadProcStat(pid, tid);
261
262 // If no stats could be read (thread may have exited), remove it.
263 if (!thread_stats.has_value()) {
264 VLOG(2) << "Removing thread " << tid << " from process " << pid;
265 process.thread_readings.erase(tid);
266 continue;
267 }
268
269 const ThreadState thread_state = CharToThreadState(thread_stats->state);
270
271 // Find or create new thread reading entry.
272 ThreadReadings &thread_reading = process.thread_readings[tid];
273
274 // Update thread name.
275 thread_reading.name = thread_stats.value().name;
276 thread_reading.start_time = ProcessStartTime(thread_stats.value());
277
278 // Update ThreadReadings with the latest cpu usage.
279 aos::RingBuffer<ThreadReading, kRingBufferSize> &readings =
280 thread_reading.readings;
281 const aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
282 const std::chrono::nanoseconds run_time =
283 TotalProcessTime(thread_stats.value());
284 // The ring buffer will push out the oldest entry if it is full.
285 readings.Push({now, run_time});
286
287 // If the buffer is full, update the CPU usage percentage.
288 if (readings.full()) {
289 const ThreadReading &previous = readings[0];
290 const ThreadReading &current = readings[1];
291 const std::chrono::nanoseconds run_time =
292 current.total_run_time - previous.total_run_time;
293 const std::chrono::nanoseconds reading_time =
294 current.reading_time - previous.reading_time;
295 thread_reading.cpu_percent = aos::time::DurationInSeconds(run_time) /
296 aos::time::DurationInSeconds(reading_time);
297 thread_reading.state = thread_state;
298 }
299 updated_threads.insert(tid);
300 }
301
302 // Remove all threads from process.thread_readings that didn't get updated.
303 std::vector<pid_t> threads_to_remove;
304 for (const auto &[tid, thread_reading] : process.thread_readings) {
305 if (!updated_threads.contains(tid)) {
306 threads_to_remove.push_back(tid);
307 }
308 }
309 for (const pid_t tid : threads_to_remove) {
310 process.thread_readings.erase(tid);
311 }
312
313 // Close the directory.
314 closedir(dir);
315}
316
James Kuszmaul418fd062022-03-22 15:22:27 -0700317void Top::UpdateReadings() {
318 aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
319 // Get all the processes that we *might* care about.
320 std::set<pid_t> pids = pids_to_track_;
James Kuszmaul63a45482022-04-19 16:12:01 -0700321 // Ensure that we check on the status of every process that we are already
322 // tracking.
Austin Schuh60e77942022-05-16 17:48:24 -0700323 for (const auto &reading : readings_) {
James Kuszmaul63a45482022-04-19 16:12:01 -0700324 pids.insert(reading.first);
milind-ueb075d22023-02-24 14:57:43 -0800325 MaybeAddThreadIds(reading.first, &pids);
James Kuszmaul63a45482022-04-19 16:12:01 -0700326 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700327 if (track_all_) {
328 DIR *const dir = opendir("/proc");
329 if (dir == nullptr) {
330 PLOG(FATAL) << "Failed to open /proc";
331 }
332 while (true) {
333 struct dirent *const dir_entry = readdir(dir);
334 if (dir_entry == nullptr) {
335 break;
336 }
337 pid_t pid;
338 if (dir_entry->d_type == DT_DIR &&
339 absl::SimpleAtoi(dir_entry->d_name, &pid)) {
340 pids.insert(pid);
milind-ueb075d22023-02-24 14:57:43 -0800341 MaybeAddThreadIds(pid, &pids);
James Kuszmaul418fd062022-03-22 15:22:27 -0700342 }
343 }
James Kuszmaul28c9e392022-11-04 13:24:12 -0700344 closedir(dir);
James Kuszmaul418fd062022-03-22 15:22:27 -0700345 }
346
347 for (const pid_t pid : pids) {
348 std::optional<ProcStat> proc_stat = ReadProcStat(pid);
349 // Stop tracking processes that have died.
350 if (!proc_stat.has_value()) {
351 readings_.erase(pid);
352 continue;
353 }
354 const aos::monotonic_clock::time_point start_time =
355 ProcessStartTime(*proc_stat);
356 auto reading_iter = readings_.find(pid);
357 if (reading_iter == readings_.end()) {
Austin Schuh979d4772022-12-30 14:50:41 -0800358 reading_iter =
359 readings_
360 .insert(std::make_pair(
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600361 pid,
362 ProcessReadings{
363 .name = proc_stat->name,
364 .start_time = start_time,
365 .cpu_percent = 0.0,
366 .kthread = !!(proc_stat->kernel_flags & PF_KTHREAD),
367 .readings = {},
368 .thread_readings = {},
369 }))
Austin Schuh979d4772022-12-30 14:50:41 -0800370 .first;
James Kuszmaul418fd062022-03-22 15:22:27 -0700371 }
372 ProcessReadings &process = reading_iter->second;
373 // The process associated with the PID has changed; reset the state.
374 if (process.start_time != start_time) {
375 process.name = proc_stat->name;
376 process.start_time = start_time;
377 process.readings.Reset();
378 }
James Kuszmaul6b35e3a2022-04-06 15:00:39 -0700379 // If the process name has changed (e.g., if our first reading for a process
380 // name occurred before execvp was called), then update it.
381 if (process.name != proc_stat->name) {
382 process.name = proc_stat->name;
383 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700384
385 process.readings.Push(Reading{now, TotalProcessTime(*proc_stat),
386 RealMemoryUsage(*proc_stat)});
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600387 if (process.readings.full()) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700388 process.cpu_percent =
389 aos::time::DurationInSeconds(process.readings[1].total_run_time -
390 process.readings[0].total_run_time) /
391 aos::time::DurationInSeconds(process.readings[1].reading_time -
392 process.readings[0].reading_time);
393 } else {
394 process.cpu_percent = 0.0;
395 }
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600396
397 // Update thread readings for this process
398 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled) {
399 UpdateThreadReadings(pid, process);
400 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700401 }
Austin Schuh608514f2022-12-30 15:51:30 -0800402
403 if (on_reading_update_) {
404 on_reading_update_();
405 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700406}
407
408flatbuffers::Offset<ProcessInfo> Top::InfoForProcess(
409 flatbuffers::FlatBufferBuilder *fbb, pid_t pid) {
410 auto reading_iter = readings_.find(pid);
411 if (reading_iter == readings_.end()) {
412 return {};
413 }
414 const ProcessReadings &reading = reading_iter->second;
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600415
416 if (reading.readings.empty()) {
417 return {}; // Return an empty offset if readings is empty.
418 }
419
420 std::vector<flatbuffers::Offset<ThreadInfo>> thread_infos_offsets;
421 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<ThreadInfo>>>
422 threads_vector_offset;
423
424 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled &&
425 !reading.thread_readings.empty()) {
426 thread_infos_offsets.reserve(reading.thread_readings.size());
427 for (const auto &[tid, thread_reading] : reading.thread_readings) {
428 // Calculate how long the thread has been alive by comparing the thread
429 // start time to the current time.
430 const aos::monotonic_clock::time_point start_time =
431 thread_reading.start_time;
432 // convert start_time to int64
433 const int64_t start_time_ns = start_time.time_since_epoch().count();
434
435 const flatbuffers::Offset<flatbuffers::String> threadName =
436 fbb->CreateString(thread_reading.name);
437 ThreadInfo::Builder thread_info_builder(*fbb);
438 thread_info_builder.add_tid(tid);
439 thread_info_builder.add_name(threadName);
440 thread_info_builder.add_cpu_usage(thread_reading.cpu_percent);
441 thread_info_builder.add_start_time(start_time_ns);
442 thread_info_builder.add_state(thread_reading.state);
443 const flatbuffers::Offset<ThreadInfo> threadInfo =
444 thread_info_builder.Finish();
445 thread_infos_offsets.push_back(threadInfo);
446 }
447 threads_vector_offset = fbb->CreateVector(thread_infos_offsets);
448 } else {
449 threads_vector_offset = 0;
450 }
451
452 // Create name string offset
James Kuszmaul418fd062022-03-22 15:22:27 -0700453 const flatbuffers::Offset<flatbuffers::String> name =
454 fbb->CreateString(reading.name);
455 ProcessInfo::Builder builder(*fbb);
456 builder.add_pid(pid);
457 builder.add_name(name);
458 builder.add_cpu_usage(reading.cpu_percent);
459 builder.add_physical_memory(
460 reading.readings[reading.readings.size() - 1].memory_usage);
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600461 if (!threads_vector_offset.IsNull()) {
462 builder.add_threads(threads_vector_offset);
463 }
464
James Kuszmaul418fd062022-03-22 15:22:27 -0700465 return builder.Finish();
466}
467
468flatbuffers::Offset<TopProcessesFbs> Top::TopProcesses(
469 flatbuffers::FlatBufferBuilder *fbb, int n) {
470 // Pair is {cpu_usage, pid}.
471 std::priority_queue<std::pair<double, pid_t>> cpu_usages;
472 for (const auto &pair : readings_) {
473 // Deliberately include 0.0 percent CPU things in the usage list so that if
474 // the user asks for an arbitrarily large number of processes they'll get
475 // everything.
476 cpu_usages.push(std::make_pair(pair.second.cpu_percent, pair.first));
477 }
478 std::vector<flatbuffers::Offset<ProcessInfo>> offsets;
479 for (int ii = 0; ii < n && !cpu_usages.empty(); ++ii) {
480 offsets.push_back(InfoForProcess(fbb, cpu_usages.top().second));
481 cpu_usages.pop();
482 }
483 const flatbuffers::Offset<
484 flatbuffers::Vector<flatbuffers::Offset<ProcessInfo>>>
485 vector_offset = fbb->CreateVector(offsets);
486 TopProcessesFbs::Builder builder(*fbb);
487 builder.add_processes(vector_offset);
488 return builder.Finish();
489}
490
491} // namespace aos::util