blob: 28b4e69f32caa9dfc530afdcaa9703483d822050 [file] [log] [blame]
James Kuszmaul418fd062022-03-22 15:22:27 -07001#include "aos/util/top.h"
2
3#include <dirent.h>
Stephan Pleinesb1177672024-05-27 17:48:32 -07004#include <errno.h>
James Kuszmaul418fd062022-03-22 15:22:27 -07005#include <unistd.h>
6
Stephan Pleinesb1177672024-05-27 17:48:32 -07007#include <algorithm>
8#include <array>
9#include <atomic>
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060010#include <cstring>
Stephan Pleinesb1177672024-05-27 17:48:32 -070011#include <istream>
James Kuszmaul418fd062022-03-22 15:22:27 -070012#include <queue>
Stephan Pleinesb1177672024-05-27 17:48:32 -070013#include <ratio>
James Kuszmaul418fd062022-03-22 15:22:27 -070014#include <string>
Stephan Pleinesb1177672024-05-27 17:48:32 -070015#include <string_view>
16#include <vector>
James Kuszmaul418fd062022-03-22 15:22:27 -070017
Austin Schuh99f7c6a2024-06-25 22:07:44 -070018#include "absl/log/check.h"
19#include "absl/log/log.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070020#include "absl/numeric/int128.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070021#include "absl/strings/numbers.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070022#include "absl/strings/str_cat.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070023#include "absl/strings/str_format.h"
24#include "absl/strings/str_split.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070025#include "flatbuffers/string.h"
26#include "flatbuffers/vector.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070027
Austin Schuh979d4772022-12-30 14:50:41 -080028#define PF_KTHREAD 0x00200000
29
James Kuszmaul418fd062022-03-22 15:22:27 -070030namespace aos::util {
31namespace {
32std::optional<std::string> ReadShortFile(std::string_view file_name) {
33 // Open as input and seek to end immediately.
34 std::ifstream file(std::string(file_name), std::ios_base::in);
35 if (!file.good()) {
36 VLOG(1) << "Can't read " << file_name;
37 return std::nullopt;
38 }
39 const size_t kMaxLineLength = 4096;
40 char buffer[kMaxLineLength];
41 file.read(buffer, kMaxLineLength);
42 if (!file.eof()) {
43 return std::nullopt;
44 }
45 return std::string(buffer, file.gcount());
46}
47} // namespace
48
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060049std::optional<ProcStat> ReadProcStat(const pid_t pid,
50 const std::optional<pid_t> tid) {
51 const std::string path =
52 tid.has_value() ? absl::StrFormat("/proc/%d/task/%d/stat", pid, *tid)
53 : absl::StrFormat("/proc/%d/stat", pid);
54 const std::optional<std::string> contents = ReadShortFile(path);
James Kuszmaul418fd062022-03-22 15:22:27 -070055 if (!contents.has_value()) {
56 return std::nullopt;
57 }
58 const size_t start_name = contents->find_first_of('(');
59 const size_t end_name = contents->find_last_of(')');
60 if (start_name == std::string::npos || end_name == std::string::npos ||
61 end_name < start_name) {
62 VLOG(1) << "No name found in stat line " << contents.value();
63 return std::nullopt;
64 }
65 std::string_view name(contents->c_str() + start_name + 1,
66 end_name - start_name - 1);
67
68 std::vector<std::string_view> fields =
69 absl::StrSplit(std::string_view(contents->c_str() + end_name + 1,
70 contents->size() - end_name - 1),
71 ' ', absl::SkipWhitespace());
72 constexpr int kNumFieldsAfterName = 50;
73 if (fields.size() != kNumFieldsAfterName) {
74 VLOG(1) << "Incorrect number of fields " << fields.size();
75 return std::nullopt;
76 }
77 // The first field is a character for the current process state; every single
78 // field after that should be an integer.
79 if (fields[0].size() != 1) {
80 VLOG(1) << "State field is too long: " << fields[0];
81 return std::nullopt;
82 }
83 std::array<absl::int128, kNumFieldsAfterName - 1> numbers;
84 for (int ii = 1; ii < kNumFieldsAfterName; ++ii) {
85 if (!absl::SimpleAtoi(fields[ii], &numbers[ii - 1])) {
86 VLOG(1) << "Failed to parse field " << ii << " as number: " << fields[ii];
87 return std::nullopt;
88 }
89 }
90 return ProcStat{
91 .pid = pid,
92 .name = std::string(name),
93 .state = fields.at(0).at(0),
94 .parent_pid = static_cast<int64_t>(numbers.at(0)),
95 .group_id = static_cast<int64_t>(numbers.at(1)),
96 .session_id = static_cast<int64_t>(numbers.at(2)),
97 .tty = static_cast<int64_t>(numbers.at(3)),
98 .tpgid = static_cast<int64_t>(numbers.at(4)),
99 .kernel_flags = static_cast<uint64_t>(numbers.at(5)),
100 .minor_faults = static_cast<uint64_t>(numbers.at(6)),
101 .children_minor_faults = static_cast<uint64_t>(numbers.at(7)),
102 .major_faults = static_cast<uint64_t>(numbers.at(8)),
103 .children_major_faults = static_cast<uint64_t>(numbers.at(9)),
104 .user_mode_ticks = static_cast<uint64_t>(numbers.at(10)),
105 .kernel_mode_ticks = static_cast<uint64_t>(numbers.at(11)),
106 .children_user_mode_ticks = static_cast<int64_t>(numbers.at(12)),
107 .children_kernel_mode_ticks = static_cast<int64_t>(numbers.at(13)),
108 .priority = static_cast<int64_t>(numbers.at(14)),
109 .nice = static_cast<int64_t>(numbers.at(15)),
110 .num_threads = static_cast<int64_t>(numbers.at(16)),
111 .itrealvalue = static_cast<int64_t>(numbers.at(17)),
112 .start_time_ticks = static_cast<uint64_t>(numbers.at(18)),
113 .virtual_memory_size = static_cast<uint64_t>(numbers.at(19)),
114 .resident_set_size = static_cast<int64_t>(numbers.at(20)),
115 .rss_soft_limit = static_cast<uint64_t>(numbers.at(21)),
116 .start_code_address = static_cast<uint64_t>(numbers.at(22)),
117 .end_code_address = static_cast<uint64_t>(numbers.at(23)),
118 .start_stack_address = static_cast<uint64_t>(numbers.at(24)),
119 .stack_pointer = static_cast<uint64_t>(numbers.at(25)),
120 .instruction_pointer = static_cast<uint64_t>(numbers.at(26)),
121 .signal_bitmask = static_cast<uint64_t>(numbers.at(27)),
122 .blocked_signals = static_cast<uint64_t>(numbers.at(28)),
123 .ignored_signals = static_cast<uint64_t>(numbers.at(29)),
124 .caught_signals = static_cast<uint64_t>(numbers.at(30)),
125 .wchan = static_cast<uint64_t>(numbers.at(31)),
126 .swap_pages = static_cast<uint64_t>(numbers.at(32)),
127 .children_swap_pages = static_cast<uint64_t>(numbers.at(33)),
128 .exit_signal = static_cast<int64_t>(numbers.at(34)),
129 .processor = static_cast<int64_t>(numbers.at(35)),
130 .rt_priority = static_cast<uint64_t>(numbers.at(36)),
131 .scheduling_policy = static_cast<uint64_t>(numbers.at(37)),
132 .block_io_delay_ticks = static_cast<uint64_t>(numbers.at(38)),
133 .guest_ticks = static_cast<uint64_t>(numbers.at(39)),
134 .children_guest_ticks = static_cast<uint64_t>(numbers.at(40)),
135 .start_data_address = static_cast<uint64_t>(numbers.at(41)),
136 .end_data_address = static_cast<uint64_t>(numbers.at(42)),
137 .start_brk_address = static_cast<uint64_t>(numbers.at(43)),
138 .start_arg_address = static_cast<uint64_t>(numbers.at(44)),
139 .end_arg_address = static_cast<uint64_t>(numbers.at(45)),
140 .start_env_address = static_cast<uint64_t>(numbers.at(46)),
141 .end_env_address = static_cast<uint64_t>(numbers.at(47)),
142 .exit_code = static_cast<int64_t>(numbers.at(48))};
143}
144
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600145Top::Top(aos::EventLoop *event_loop, TrackThreadsMode track_threads,
146 TrackPerThreadInfoMode track_per_thread_info)
James Kuszmaul418fd062022-03-22 15:22:27 -0700147 : event_loop_(event_loop),
148 clock_tick_(std::chrono::nanoseconds(1000000000 / sysconf(_SC_CLK_TCK))),
milind-ueb075d22023-02-24 14:57:43 -0800149 page_size_(sysconf(_SC_PAGESIZE)),
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600150 track_threads_(track_threads),
151 track_per_thread_info_(track_per_thread_info) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700152 TimerHandler *timer = event_loop_->AddTimer([this]() { UpdateReadings(); });
153 event_loop_->OnRun([timer, this]() {
Philipp Schradera6712522023-07-05 20:25:11 -0700154 timer->Schedule(event_loop_->monotonic_now(), kSamplePeriod);
James Kuszmaul418fd062022-03-22 15:22:27 -0700155 });
156}
157
158std::chrono::nanoseconds Top::TotalProcessTime(const ProcStat &proc_stat) {
159 return (proc_stat.user_mode_ticks + proc_stat.kernel_mode_ticks) *
160 clock_tick_;
161}
162
163aos::monotonic_clock::time_point Top::ProcessStartTime(
164 const ProcStat &proc_stat) {
165 return aos::monotonic_clock::time_point(proc_stat.start_time_ticks *
166 clock_tick_);
167}
168
169uint64_t Top::RealMemoryUsage(const ProcStat &proc_stat) {
170 return proc_stat.resident_set_size * page_size_;
171}
172
milind-ueb075d22023-02-24 14:57:43 -0800173void Top::MaybeAddThreadIds(pid_t pid, std::set<pid_t> *pids) {
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600174 if (track_threads_ == TrackThreadsMode::kDisabled) {
milind-ueb075d22023-02-24 14:57:43 -0800175 return;
176 }
177
178 // Add all the threads in /proc/pid/task
179 std::string task_dir = absl::StrCat("/proc/", std::to_string(pid), "/task/");
180 DIR *dir = opendir(task_dir.data());
181 if (dir == nullptr) {
182 LOG(WARNING) << "Unable to open " << task_dir;
183 return;
184 }
185
186 while (true) {
187 struct dirent *const dir_entry = readdir(dir);
188 if (dir_entry == nullptr) {
189 break;
190 }
191 pid_t tid;
192 if (absl::SimpleAtoi(dir_entry->d_name, &tid)) {
193 pids->emplace(tid);
194 }
195 }
196 closedir(dir);
197}
198
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600199ThreadState CharToThreadState(const char state) {
200 switch (state) {
201 case 'R':
202 return ThreadState::RUNNING;
203 case 'S':
204 return ThreadState::SLEEPING_INTERRUPTIBLE;
205 case 'D':
206 return ThreadState::SLEEPING_UNINTERRUPTIBLE;
207 case 'T':
208 return ThreadState::STOPPED;
209 case 'Z':
210 return ThreadState::ZOMBIE;
211 case 'I':
212 return ThreadState::IDLE;
Austin Schuh1da47ac2024-05-10 16:42:49 -0700213 case 'X':
214 return ThreadState::DEAD;
215 case 't':
216 return ThreadState::TRACING_STOP;
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600217 default:
218 LOG(FATAL) << "Invalid thread state character: " << state;
219 }
220}
221
222void Top::UpdateThreadReadings(pid_t pid, ProcessReadings &process) {
223 // Construct the path to the task directory which lists all threads
224 std::string task_dir = absl::StrFormat("/proc/%d/task", pid);
225
226 // Verify we can open the directory.
227 DIR *dir = opendir(task_dir.c_str());
228 if (dir == nullptr) {
Austin Schuh99f7c6a2024-06-25 22:07:44 -0700229 LOG_EVERY_N_SEC(WARNING, 10) << "Unable to open directory: " << task_dir
230 << ", error: " << strerror(errno);
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600231 return;
232 }
233
234 // Use a set to track all the threads that we process.
235 std::set<pid_t> updated_threads;
236
237 // Iterate over all entries in the directory.
238 struct dirent *entry;
239 while ((entry = readdir(dir)) != nullptr) {
240 // Skip non-directories
241 if (entry->d_type != DT_DIR) {
242 continue;
243 }
244
245 // Skip "." and "..".
246 const bool is_current_dir = strcmp(entry->d_name, ".") == 0;
247 const bool is_parent_dir = strcmp(entry->d_name, "..") == 0;
248 if (is_current_dir || is_parent_dir) {
249 continue;
250 }
251
252 // Verify the entry is a valid thread ID.
253 pid_t tid;
254 const bool is_valid_thread_id = absl::SimpleAtoi(entry->d_name, &tid);
255 if (!is_valid_thread_id) {
256 continue;
257 }
258
259 // Read the stats for the thread.
260 const std::optional<ProcStat> thread_stats = ReadProcStat(pid, tid);
261
262 // If no stats could be read (thread may have exited), remove it.
263 if (!thread_stats.has_value()) {
264 VLOG(2) << "Removing thread " << tid << " from process " << pid;
265 process.thread_readings.erase(tid);
266 continue;
267 }
268
269 const ThreadState thread_state = CharToThreadState(thread_stats->state);
270
271 // Find or create new thread reading entry.
272 ThreadReadings &thread_reading = process.thread_readings[tid];
273
274 // Update thread name.
275 thread_reading.name = thread_stats.value().name;
276 thread_reading.start_time = ProcessStartTime(thread_stats.value());
277
278 // Update ThreadReadings with the latest cpu usage.
279 aos::RingBuffer<ThreadReading, kRingBufferSize> &readings =
280 thread_reading.readings;
281 const aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
282 const std::chrono::nanoseconds run_time =
283 TotalProcessTime(thread_stats.value());
284 // The ring buffer will push out the oldest entry if it is full.
285 readings.Push({now, run_time});
286
287 // If the buffer is full, update the CPU usage percentage.
288 if (readings.full()) {
289 const ThreadReading &previous = readings[0];
290 const ThreadReading &current = readings[1];
291 const std::chrono::nanoseconds run_time =
292 current.total_run_time - previous.total_run_time;
293 const std::chrono::nanoseconds reading_time =
294 current.reading_time - previous.reading_time;
295 thread_reading.cpu_percent = aos::time::DurationInSeconds(run_time) /
296 aos::time::DurationInSeconds(reading_time);
297 thread_reading.state = thread_state;
298 }
299 updated_threads.insert(tid);
300 }
301
302 // Remove all threads from process.thread_readings that didn't get updated.
303 std::vector<pid_t> threads_to_remove;
304 for (const auto &[tid, thread_reading] : process.thread_readings) {
305 if (!updated_threads.contains(tid)) {
306 threads_to_remove.push_back(tid);
307 }
308 }
309 for (const pid_t tid : threads_to_remove) {
310 process.thread_readings.erase(tid);
311 }
312
313 // Close the directory.
314 closedir(dir);
315}
316
James Kuszmaul418fd062022-03-22 15:22:27 -0700317void Top::UpdateReadings() {
318 aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
319 // Get all the processes that we *might* care about.
320 std::set<pid_t> pids = pids_to_track_;
James Kuszmaul63a45482022-04-19 16:12:01 -0700321 // Ensure that we check on the status of every process that we are already
322 // tracking.
Austin Schuh60e77942022-05-16 17:48:24 -0700323 for (const auto &reading : readings_) {
James Kuszmaul63a45482022-04-19 16:12:01 -0700324 pids.insert(reading.first);
milind-ueb075d22023-02-24 14:57:43 -0800325 MaybeAddThreadIds(reading.first, &pids);
James Kuszmaul63a45482022-04-19 16:12:01 -0700326 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700327 if (track_all_) {
328 DIR *const dir = opendir("/proc");
329 if (dir == nullptr) {
330 PLOG(FATAL) << "Failed to open /proc";
331 }
332 while (true) {
333 struct dirent *const dir_entry = readdir(dir);
334 if (dir_entry == nullptr) {
335 break;
336 }
337 pid_t pid;
338 if (dir_entry->d_type == DT_DIR &&
339 absl::SimpleAtoi(dir_entry->d_name, &pid)) {
340 pids.insert(pid);
milind-ueb075d22023-02-24 14:57:43 -0800341 MaybeAddThreadIds(pid, &pids);
James Kuszmaul418fd062022-03-22 15:22:27 -0700342 }
343 }
James Kuszmaul28c9e392022-11-04 13:24:12 -0700344 closedir(dir);
James Kuszmaul418fd062022-03-22 15:22:27 -0700345 }
346
347 for (const pid_t pid : pids) {
348 std::optional<ProcStat> proc_stat = ReadProcStat(pid);
349 // Stop tracking processes that have died.
350 if (!proc_stat.has_value()) {
351 readings_.erase(pid);
352 continue;
353 }
354 const aos::monotonic_clock::time_point start_time =
355 ProcessStartTime(*proc_stat);
356 auto reading_iter = readings_.find(pid);
357 if (reading_iter == readings_.end()) {
Austin Schuh979d4772022-12-30 14:50:41 -0800358 reading_iter =
359 readings_
360 .insert(std::make_pair(
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600361 pid,
362 ProcessReadings{
363 .name = proc_stat->name,
364 .start_time = start_time,
365 .cpu_percent = 0.0,
366 .kthread = !!(proc_stat->kernel_flags & PF_KTHREAD),
367 .readings = {},
368 .thread_readings = {},
369 }))
Austin Schuh979d4772022-12-30 14:50:41 -0800370 .first;
James Kuszmaul418fd062022-03-22 15:22:27 -0700371 }
372 ProcessReadings &process = reading_iter->second;
373 // The process associated with the PID has changed; reset the state.
374 if (process.start_time != start_time) {
375 process.name = proc_stat->name;
376 process.start_time = start_time;
377 process.readings.Reset();
378 }
James Kuszmaul6b35e3a2022-04-06 15:00:39 -0700379 // If the process name has changed (e.g., if our first reading for a process
380 // name occurred before execvp was called), then update it.
381 if (process.name != proc_stat->name) {
382 process.name = proc_stat->name;
383 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700384
385 process.readings.Push(Reading{now, TotalProcessTime(*proc_stat),
386 RealMemoryUsage(*proc_stat)});
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600387 if (process.readings.full()) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700388 process.cpu_percent =
389 aos::time::DurationInSeconds(process.readings[1].total_run_time -
390 process.readings[0].total_run_time) /
391 aos::time::DurationInSeconds(process.readings[1].reading_time -
392 process.readings[0].reading_time);
393 } else {
394 process.cpu_percent = 0.0;
395 }
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600396
397 // Update thread readings for this process
398 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled) {
399 UpdateThreadReadings(pid, process);
400 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700401 }
Austin Schuh608514f2022-12-30 15:51:30 -0800402
403 if (on_reading_update_) {
404 on_reading_update_();
405 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700406}
407
408flatbuffers::Offset<ProcessInfo> Top::InfoForProcess(
409 flatbuffers::FlatBufferBuilder *fbb, pid_t pid) {
410 auto reading_iter = readings_.find(pid);
411 if (reading_iter == readings_.end()) {
412 return {};
413 }
414 const ProcessReadings &reading = reading_iter->second;
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600415
416 if (reading.readings.empty()) {
417 return {}; // Return an empty offset if readings is empty.
418 }
419
420 std::vector<flatbuffers::Offset<ThreadInfo>> thread_infos_offsets;
421 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<ThreadInfo>>>
422 threads_vector_offset;
423
424 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled &&
425 !reading.thread_readings.empty()) {
426 thread_infos_offsets.reserve(reading.thread_readings.size());
427 for (const auto &[tid, thread_reading] : reading.thread_readings) {
428 // Calculate how long the thread has been alive by comparing the thread
429 // start time to the current time.
430 const aos::monotonic_clock::time_point start_time =
431 thread_reading.start_time;
432 // convert start_time to int64
433 const int64_t start_time_ns = start_time.time_since_epoch().count();
434
435 const flatbuffers::Offset<flatbuffers::String> threadName =
436 fbb->CreateString(thread_reading.name);
437 ThreadInfo::Builder thread_info_builder(*fbb);
438 thread_info_builder.add_tid(tid);
439 thread_info_builder.add_name(threadName);
440 thread_info_builder.add_cpu_usage(thread_reading.cpu_percent);
441 thread_info_builder.add_start_time(start_time_ns);
442 thread_info_builder.add_state(thread_reading.state);
443 const flatbuffers::Offset<ThreadInfo> threadInfo =
444 thread_info_builder.Finish();
445 thread_infos_offsets.push_back(threadInfo);
446 }
447 threads_vector_offset = fbb->CreateVector(thread_infos_offsets);
448 } else {
449 threads_vector_offset = 0;
450 }
451
452 // Create name string offset
James Kuszmaul418fd062022-03-22 15:22:27 -0700453 const flatbuffers::Offset<flatbuffers::String> name =
454 fbb->CreateString(reading.name);
455 ProcessInfo::Builder builder(*fbb);
456 builder.add_pid(pid);
457 builder.add_name(name);
458 builder.add_cpu_usage(reading.cpu_percent);
459 builder.add_physical_memory(
460 reading.readings[reading.readings.size() - 1].memory_usage);
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600461 if (!threads_vector_offset.IsNull()) {
462 builder.add_threads(threads_vector_offset);
463 }
464
James Kuszmaul418fd062022-03-22 15:22:27 -0700465 return builder.Finish();
466}
467
468flatbuffers::Offset<TopProcessesFbs> Top::TopProcesses(
469 flatbuffers::FlatBufferBuilder *fbb, int n) {
470 // Pair is {cpu_usage, pid}.
471 std::priority_queue<std::pair<double, pid_t>> cpu_usages;
472 for (const auto &pair : readings_) {
473 // Deliberately include 0.0 percent CPU things in the usage list so that if
474 // the user asks for an arbitrarily large number of processes they'll get
475 // everything.
476 cpu_usages.push(std::make_pair(pair.second.cpu_percent, pair.first));
477 }
478 std::vector<flatbuffers::Offset<ProcessInfo>> offsets;
479 for (int ii = 0; ii < n && !cpu_usages.empty(); ++ii) {
480 offsets.push_back(InfoForProcess(fbb, cpu_usages.top().second));
481 cpu_usages.pop();
482 }
483 const flatbuffers::Offset<
484 flatbuffers::Vector<flatbuffers::Offset<ProcessInfo>>>
485 vector_offset = fbb->CreateVector(offsets);
486 TopProcessesFbs::Builder builder(*fbb);
487 builder.add_processes(vector_offset);
488 return builder.Finish();
489}
490
491} // namespace aos::util