blob: 10ebd4d20eefbff5cb6695e6a8dbcc3dbf4cd916 [file] [log] [blame]
James Kuszmaul418fd062022-03-22 15:22:27 -07001#include "aos/util/top.h"
2
3#include <dirent.h>
Stephan Pleinesb1177672024-05-27 17:48:32 -07004#include <errno.h>
James Kuszmaul418fd062022-03-22 15:22:27 -07005#include <unistd.h>
6
Stephan Pleinesb1177672024-05-27 17:48:32 -07007#include <algorithm>
8#include <array>
9#include <atomic>
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060010#include <cstring>
Stephan Pleinesb1177672024-05-27 17:48:32 -070011#include <istream>
James Kuszmaul418fd062022-03-22 15:22:27 -070012#include <queue>
Stephan Pleinesb1177672024-05-27 17:48:32 -070013#include <ratio>
James Kuszmaul418fd062022-03-22 15:22:27 -070014#include <string>
Stephan Pleinesb1177672024-05-27 17:48:32 -070015#include <string_view>
16#include <vector>
James Kuszmaul418fd062022-03-22 15:22:27 -070017
Stephan Pleinesb1177672024-05-27 17:48:32 -070018#include "absl/numeric/int128.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070019#include "absl/strings/numbers.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070020#include "absl/strings/str_cat.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070021#include "absl/strings/str_format.h"
22#include "absl/strings/str_split.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070023#include "flatbuffers/string.h"
24#include "flatbuffers/vector.h"
25#include "glog/logging.h"
James Kuszmaul418fd062022-03-22 15:22:27 -070026
Austin Schuh979d4772022-12-30 14:50:41 -080027#define PF_KTHREAD 0x00200000
28
James Kuszmaul418fd062022-03-22 15:22:27 -070029namespace aos::util {
30namespace {
31std::optional<std::string> ReadShortFile(std::string_view file_name) {
32 // Open as input and seek to end immediately.
33 std::ifstream file(std::string(file_name), std::ios_base::in);
34 if (!file.good()) {
35 VLOG(1) << "Can't read " << file_name;
36 return std::nullopt;
37 }
38 const size_t kMaxLineLength = 4096;
39 char buffer[kMaxLineLength];
40 file.read(buffer, kMaxLineLength);
41 if (!file.eof()) {
42 return std::nullopt;
43 }
44 return std::string(buffer, file.gcount());
45}
46} // namespace
47
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060048std::optional<ProcStat> ReadProcStat(const pid_t pid,
49 const std::optional<pid_t> tid) {
50 const std::string path =
51 tid.has_value() ? absl::StrFormat("/proc/%d/task/%d/stat", pid, *tid)
52 : absl::StrFormat("/proc/%d/stat", pid);
53 const std::optional<std::string> contents = ReadShortFile(path);
James Kuszmaul418fd062022-03-22 15:22:27 -070054 if (!contents.has_value()) {
55 return std::nullopt;
56 }
57 const size_t start_name = contents->find_first_of('(');
58 const size_t end_name = contents->find_last_of(')');
59 if (start_name == std::string::npos || end_name == std::string::npos ||
60 end_name < start_name) {
61 VLOG(1) << "No name found in stat line " << contents.value();
62 return std::nullopt;
63 }
64 std::string_view name(contents->c_str() + start_name + 1,
65 end_name - start_name - 1);
66
67 std::vector<std::string_view> fields =
68 absl::StrSplit(std::string_view(contents->c_str() + end_name + 1,
69 contents->size() - end_name - 1),
70 ' ', absl::SkipWhitespace());
71 constexpr int kNumFieldsAfterName = 50;
72 if (fields.size() != kNumFieldsAfterName) {
73 VLOG(1) << "Incorrect number of fields " << fields.size();
74 return std::nullopt;
75 }
76 // The first field is a character for the current process state; every single
77 // field after that should be an integer.
78 if (fields[0].size() != 1) {
79 VLOG(1) << "State field is too long: " << fields[0];
80 return std::nullopt;
81 }
82 std::array<absl::int128, kNumFieldsAfterName - 1> numbers;
83 for (int ii = 1; ii < kNumFieldsAfterName; ++ii) {
84 if (!absl::SimpleAtoi(fields[ii], &numbers[ii - 1])) {
85 VLOG(1) << "Failed to parse field " << ii << " as number: " << fields[ii];
86 return std::nullopt;
87 }
88 }
89 return ProcStat{
90 .pid = pid,
91 .name = std::string(name),
92 .state = fields.at(0).at(0),
93 .parent_pid = static_cast<int64_t>(numbers.at(0)),
94 .group_id = static_cast<int64_t>(numbers.at(1)),
95 .session_id = static_cast<int64_t>(numbers.at(2)),
96 .tty = static_cast<int64_t>(numbers.at(3)),
97 .tpgid = static_cast<int64_t>(numbers.at(4)),
98 .kernel_flags = static_cast<uint64_t>(numbers.at(5)),
99 .minor_faults = static_cast<uint64_t>(numbers.at(6)),
100 .children_minor_faults = static_cast<uint64_t>(numbers.at(7)),
101 .major_faults = static_cast<uint64_t>(numbers.at(8)),
102 .children_major_faults = static_cast<uint64_t>(numbers.at(9)),
103 .user_mode_ticks = static_cast<uint64_t>(numbers.at(10)),
104 .kernel_mode_ticks = static_cast<uint64_t>(numbers.at(11)),
105 .children_user_mode_ticks = static_cast<int64_t>(numbers.at(12)),
106 .children_kernel_mode_ticks = static_cast<int64_t>(numbers.at(13)),
107 .priority = static_cast<int64_t>(numbers.at(14)),
108 .nice = static_cast<int64_t>(numbers.at(15)),
109 .num_threads = static_cast<int64_t>(numbers.at(16)),
110 .itrealvalue = static_cast<int64_t>(numbers.at(17)),
111 .start_time_ticks = static_cast<uint64_t>(numbers.at(18)),
112 .virtual_memory_size = static_cast<uint64_t>(numbers.at(19)),
113 .resident_set_size = static_cast<int64_t>(numbers.at(20)),
114 .rss_soft_limit = static_cast<uint64_t>(numbers.at(21)),
115 .start_code_address = static_cast<uint64_t>(numbers.at(22)),
116 .end_code_address = static_cast<uint64_t>(numbers.at(23)),
117 .start_stack_address = static_cast<uint64_t>(numbers.at(24)),
118 .stack_pointer = static_cast<uint64_t>(numbers.at(25)),
119 .instruction_pointer = static_cast<uint64_t>(numbers.at(26)),
120 .signal_bitmask = static_cast<uint64_t>(numbers.at(27)),
121 .blocked_signals = static_cast<uint64_t>(numbers.at(28)),
122 .ignored_signals = static_cast<uint64_t>(numbers.at(29)),
123 .caught_signals = static_cast<uint64_t>(numbers.at(30)),
124 .wchan = static_cast<uint64_t>(numbers.at(31)),
125 .swap_pages = static_cast<uint64_t>(numbers.at(32)),
126 .children_swap_pages = static_cast<uint64_t>(numbers.at(33)),
127 .exit_signal = static_cast<int64_t>(numbers.at(34)),
128 .processor = static_cast<int64_t>(numbers.at(35)),
129 .rt_priority = static_cast<uint64_t>(numbers.at(36)),
130 .scheduling_policy = static_cast<uint64_t>(numbers.at(37)),
131 .block_io_delay_ticks = static_cast<uint64_t>(numbers.at(38)),
132 .guest_ticks = static_cast<uint64_t>(numbers.at(39)),
133 .children_guest_ticks = static_cast<uint64_t>(numbers.at(40)),
134 .start_data_address = static_cast<uint64_t>(numbers.at(41)),
135 .end_data_address = static_cast<uint64_t>(numbers.at(42)),
136 .start_brk_address = static_cast<uint64_t>(numbers.at(43)),
137 .start_arg_address = static_cast<uint64_t>(numbers.at(44)),
138 .end_arg_address = static_cast<uint64_t>(numbers.at(45)),
139 .start_env_address = static_cast<uint64_t>(numbers.at(46)),
140 .end_env_address = static_cast<uint64_t>(numbers.at(47)),
141 .exit_code = static_cast<int64_t>(numbers.at(48))};
142}
143
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600144Top::Top(aos::EventLoop *event_loop, TrackThreadsMode track_threads,
145 TrackPerThreadInfoMode track_per_thread_info)
James Kuszmaul418fd062022-03-22 15:22:27 -0700146 : event_loop_(event_loop),
147 clock_tick_(std::chrono::nanoseconds(1000000000 / sysconf(_SC_CLK_TCK))),
milind-ueb075d22023-02-24 14:57:43 -0800148 page_size_(sysconf(_SC_PAGESIZE)),
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600149 track_threads_(track_threads),
150 track_per_thread_info_(track_per_thread_info) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700151 TimerHandler *timer = event_loop_->AddTimer([this]() { UpdateReadings(); });
152 event_loop_->OnRun([timer, this]() {
Philipp Schradera6712522023-07-05 20:25:11 -0700153 timer->Schedule(event_loop_->monotonic_now(), kSamplePeriod);
James Kuszmaul418fd062022-03-22 15:22:27 -0700154 });
155}
156
157std::chrono::nanoseconds Top::TotalProcessTime(const ProcStat &proc_stat) {
158 return (proc_stat.user_mode_ticks + proc_stat.kernel_mode_ticks) *
159 clock_tick_;
160}
161
162aos::monotonic_clock::time_point Top::ProcessStartTime(
163 const ProcStat &proc_stat) {
164 return aos::monotonic_clock::time_point(proc_stat.start_time_ticks *
165 clock_tick_);
166}
167
168uint64_t Top::RealMemoryUsage(const ProcStat &proc_stat) {
169 return proc_stat.resident_set_size * page_size_;
170}
171
milind-ueb075d22023-02-24 14:57:43 -0800172void Top::MaybeAddThreadIds(pid_t pid, std::set<pid_t> *pids) {
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600173 if (track_threads_ == TrackThreadsMode::kDisabled) {
milind-ueb075d22023-02-24 14:57:43 -0800174 return;
175 }
176
177 // Add all the threads in /proc/pid/task
178 std::string task_dir = absl::StrCat("/proc/", std::to_string(pid), "/task/");
179 DIR *dir = opendir(task_dir.data());
180 if (dir == nullptr) {
181 LOG(WARNING) << "Unable to open " << task_dir;
182 return;
183 }
184
185 while (true) {
186 struct dirent *const dir_entry = readdir(dir);
187 if (dir_entry == nullptr) {
188 break;
189 }
190 pid_t tid;
191 if (absl::SimpleAtoi(dir_entry->d_name, &tid)) {
192 pids->emplace(tid);
193 }
194 }
195 closedir(dir);
196}
197
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600198ThreadState CharToThreadState(const char state) {
199 switch (state) {
200 case 'R':
201 return ThreadState::RUNNING;
202 case 'S':
203 return ThreadState::SLEEPING_INTERRUPTIBLE;
204 case 'D':
205 return ThreadState::SLEEPING_UNINTERRUPTIBLE;
206 case 'T':
207 return ThreadState::STOPPED;
208 case 'Z':
209 return ThreadState::ZOMBIE;
210 case 'I':
211 return ThreadState::IDLE;
212 default:
213 LOG(FATAL) << "Invalid thread state character: " << state;
214 }
215}
216
217void Top::UpdateThreadReadings(pid_t pid, ProcessReadings &process) {
218 // Construct the path to the task directory which lists all threads
219 std::string task_dir = absl::StrFormat("/proc/%d/task", pid);
220
221 // Verify we can open the directory.
222 DIR *dir = opendir(task_dir.c_str());
223 if (dir == nullptr) {
224 LOG_EVERY_T(WARNING, 10) << "Unable to open directory: " << task_dir
225 << ", error: " << strerror(errno);
226 ;
227 return;
228 }
229
230 // Use a set to track all the threads that we process.
231 std::set<pid_t> updated_threads;
232
233 // Iterate over all entries in the directory.
234 struct dirent *entry;
235 while ((entry = readdir(dir)) != nullptr) {
236 // Skip non-directories
237 if (entry->d_type != DT_DIR) {
238 continue;
239 }
240
241 // Skip "." and "..".
242 const bool is_current_dir = strcmp(entry->d_name, ".") == 0;
243 const bool is_parent_dir = strcmp(entry->d_name, "..") == 0;
244 if (is_current_dir || is_parent_dir) {
245 continue;
246 }
247
248 // Verify the entry is a valid thread ID.
249 pid_t tid;
250 const bool is_valid_thread_id = absl::SimpleAtoi(entry->d_name, &tid);
251 if (!is_valid_thread_id) {
252 continue;
253 }
254
255 // Read the stats for the thread.
256 const std::optional<ProcStat> thread_stats = ReadProcStat(pid, tid);
257
258 // If no stats could be read (thread may have exited), remove it.
259 if (!thread_stats.has_value()) {
260 VLOG(2) << "Removing thread " << tid << " from process " << pid;
261 process.thread_readings.erase(tid);
262 continue;
263 }
264
265 const ThreadState thread_state = CharToThreadState(thread_stats->state);
266
267 // Find or create new thread reading entry.
268 ThreadReadings &thread_reading = process.thread_readings[tid];
269
270 // Update thread name.
271 thread_reading.name = thread_stats.value().name;
272 thread_reading.start_time = ProcessStartTime(thread_stats.value());
273
274 // Update ThreadReadings with the latest cpu usage.
275 aos::RingBuffer<ThreadReading, kRingBufferSize> &readings =
276 thread_reading.readings;
277 const aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
278 const std::chrono::nanoseconds run_time =
279 TotalProcessTime(thread_stats.value());
280 // The ring buffer will push out the oldest entry if it is full.
281 readings.Push({now, run_time});
282
283 // If the buffer is full, update the CPU usage percentage.
284 if (readings.full()) {
285 const ThreadReading &previous = readings[0];
286 const ThreadReading &current = readings[1];
287 const std::chrono::nanoseconds run_time =
288 current.total_run_time - previous.total_run_time;
289 const std::chrono::nanoseconds reading_time =
290 current.reading_time - previous.reading_time;
291 thread_reading.cpu_percent = aos::time::DurationInSeconds(run_time) /
292 aos::time::DurationInSeconds(reading_time);
293 thread_reading.state = thread_state;
294 }
295 updated_threads.insert(tid);
296 }
297
298 // Remove all threads from process.thread_readings that didn't get updated.
299 std::vector<pid_t> threads_to_remove;
300 for (const auto &[tid, thread_reading] : process.thread_readings) {
301 if (!updated_threads.contains(tid)) {
302 threads_to_remove.push_back(tid);
303 }
304 }
305 for (const pid_t tid : threads_to_remove) {
306 process.thread_readings.erase(tid);
307 }
308
309 // Close the directory.
310 closedir(dir);
311}
312
James Kuszmaul418fd062022-03-22 15:22:27 -0700313void Top::UpdateReadings() {
314 aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
315 // Get all the processes that we *might* care about.
316 std::set<pid_t> pids = pids_to_track_;
James Kuszmaul63a45482022-04-19 16:12:01 -0700317 // Ensure that we check on the status of every process that we are already
318 // tracking.
Austin Schuh60e77942022-05-16 17:48:24 -0700319 for (const auto &reading : readings_) {
James Kuszmaul63a45482022-04-19 16:12:01 -0700320 pids.insert(reading.first);
milind-ueb075d22023-02-24 14:57:43 -0800321 MaybeAddThreadIds(reading.first, &pids);
James Kuszmaul63a45482022-04-19 16:12:01 -0700322 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700323 if (track_all_) {
324 DIR *const dir = opendir("/proc");
325 if (dir == nullptr) {
326 PLOG(FATAL) << "Failed to open /proc";
327 }
328 while (true) {
329 struct dirent *const dir_entry = readdir(dir);
330 if (dir_entry == nullptr) {
331 break;
332 }
333 pid_t pid;
334 if (dir_entry->d_type == DT_DIR &&
335 absl::SimpleAtoi(dir_entry->d_name, &pid)) {
336 pids.insert(pid);
milind-ueb075d22023-02-24 14:57:43 -0800337 MaybeAddThreadIds(pid, &pids);
James Kuszmaul418fd062022-03-22 15:22:27 -0700338 }
339 }
James Kuszmaul28c9e392022-11-04 13:24:12 -0700340 closedir(dir);
James Kuszmaul418fd062022-03-22 15:22:27 -0700341 }
342
343 for (const pid_t pid : pids) {
344 std::optional<ProcStat> proc_stat = ReadProcStat(pid);
345 // Stop tracking processes that have died.
346 if (!proc_stat.has_value()) {
347 readings_.erase(pid);
348 continue;
349 }
350 const aos::monotonic_clock::time_point start_time =
351 ProcessStartTime(*proc_stat);
352 auto reading_iter = readings_.find(pid);
353 if (reading_iter == readings_.end()) {
Austin Schuh979d4772022-12-30 14:50:41 -0800354 reading_iter =
355 readings_
356 .insert(std::make_pair(
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600357 pid,
358 ProcessReadings{
359 .name = proc_stat->name,
360 .start_time = start_time,
361 .cpu_percent = 0.0,
362 .kthread = !!(proc_stat->kernel_flags & PF_KTHREAD),
363 .readings = {},
364 .thread_readings = {},
365 }))
Austin Schuh979d4772022-12-30 14:50:41 -0800366 .first;
James Kuszmaul418fd062022-03-22 15:22:27 -0700367 }
368 ProcessReadings &process = reading_iter->second;
369 // The process associated with the PID has changed; reset the state.
370 if (process.start_time != start_time) {
371 process.name = proc_stat->name;
372 process.start_time = start_time;
373 process.readings.Reset();
374 }
James Kuszmaul6b35e3a2022-04-06 15:00:39 -0700375 // If the process name has changed (e.g., if our first reading for a process
376 // name occurred before execvp was called), then update it.
377 if (process.name != proc_stat->name) {
378 process.name = proc_stat->name;
379 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700380
381 process.readings.Push(Reading{now, TotalProcessTime(*proc_stat),
382 RealMemoryUsage(*proc_stat)});
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600383 if (process.readings.full()) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700384 process.cpu_percent =
385 aos::time::DurationInSeconds(process.readings[1].total_run_time -
386 process.readings[0].total_run_time) /
387 aos::time::DurationInSeconds(process.readings[1].reading_time -
388 process.readings[0].reading_time);
389 } else {
390 process.cpu_percent = 0.0;
391 }
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600392
393 // Update thread readings for this process
394 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled) {
395 UpdateThreadReadings(pid, process);
396 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700397 }
Austin Schuh608514f2022-12-30 15:51:30 -0800398
399 if (on_reading_update_) {
400 on_reading_update_();
401 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700402}
403
404flatbuffers::Offset<ProcessInfo> Top::InfoForProcess(
405 flatbuffers::FlatBufferBuilder *fbb, pid_t pid) {
406 auto reading_iter = readings_.find(pid);
407 if (reading_iter == readings_.end()) {
408 return {};
409 }
410 const ProcessReadings &reading = reading_iter->second;
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600411
412 if (reading.readings.empty()) {
413 return {}; // Return an empty offset if readings is empty.
414 }
415
416 std::vector<flatbuffers::Offset<ThreadInfo>> thread_infos_offsets;
417 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<ThreadInfo>>>
418 threads_vector_offset;
419
420 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled &&
421 !reading.thread_readings.empty()) {
422 thread_infos_offsets.reserve(reading.thread_readings.size());
423 for (const auto &[tid, thread_reading] : reading.thread_readings) {
424 // Calculate how long the thread has been alive by comparing the thread
425 // start time to the current time.
426 const aos::monotonic_clock::time_point start_time =
427 thread_reading.start_time;
428 // convert start_time to int64
429 const int64_t start_time_ns = start_time.time_since_epoch().count();
430
431 const flatbuffers::Offset<flatbuffers::String> threadName =
432 fbb->CreateString(thread_reading.name);
433 ThreadInfo::Builder thread_info_builder(*fbb);
434 thread_info_builder.add_tid(tid);
435 thread_info_builder.add_name(threadName);
436 thread_info_builder.add_cpu_usage(thread_reading.cpu_percent);
437 thread_info_builder.add_start_time(start_time_ns);
438 thread_info_builder.add_state(thread_reading.state);
439 const flatbuffers::Offset<ThreadInfo> threadInfo =
440 thread_info_builder.Finish();
441 thread_infos_offsets.push_back(threadInfo);
442 }
443 threads_vector_offset = fbb->CreateVector(thread_infos_offsets);
444 } else {
445 threads_vector_offset = 0;
446 }
447
448 // Create name string offset
James Kuszmaul418fd062022-03-22 15:22:27 -0700449 const flatbuffers::Offset<flatbuffers::String> name =
450 fbb->CreateString(reading.name);
451 ProcessInfo::Builder builder(*fbb);
452 builder.add_pid(pid);
453 builder.add_name(name);
454 builder.add_cpu_usage(reading.cpu_percent);
455 builder.add_physical_memory(
456 reading.readings[reading.readings.size() - 1].memory_usage);
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600457 if (!threads_vector_offset.IsNull()) {
458 builder.add_threads(threads_vector_offset);
459 }
460
James Kuszmaul418fd062022-03-22 15:22:27 -0700461 return builder.Finish();
462}
463
464flatbuffers::Offset<TopProcessesFbs> Top::TopProcesses(
465 flatbuffers::FlatBufferBuilder *fbb, int n) {
466 // Pair is {cpu_usage, pid}.
467 std::priority_queue<std::pair<double, pid_t>> cpu_usages;
468 for (const auto &pair : readings_) {
469 // Deliberately include 0.0 percent CPU things in the usage list so that if
470 // the user asks for an arbitrarily large number of processes they'll get
471 // everything.
472 cpu_usages.push(std::make_pair(pair.second.cpu_percent, pair.first));
473 }
474 std::vector<flatbuffers::Offset<ProcessInfo>> offsets;
475 for (int ii = 0; ii < n && !cpu_usages.empty(); ++ii) {
476 offsets.push_back(InfoForProcess(fbb, cpu_usages.top().second));
477 cpu_usages.pop();
478 }
479 const flatbuffers::Offset<
480 flatbuffers::Vector<flatbuffers::Offset<ProcessInfo>>>
481 vector_offset = fbb->CreateVector(offsets);
482 TopProcessesFbs::Builder builder(*fbb);
483 builder.add_processes(vector_offset);
484 return builder.Finish();
485}
486
487} // namespace aos::util