blob: c22fee45fabdeef5c951171bd099326be9a2b6b6 [file] [log] [blame]
James Kuszmaul418fd062022-03-22 15:22:27 -07001#include "aos/util/top.h"
2
3#include <dirent.h>
Maxwell Gumleyb27245f2024-04-11 15:46:22 -06004#include <sys/types.h> // used for DIR
James Kuszmaul418fd062022-03-22 15:22:27 -07005#include <unistd.h>
6
Maxwell Gumleyb27245f2024-04-11 15:46:22 -06007#include <cstring>
James Kuszmaul418fd062022-03-22 15:22:27 -07008#include <queue>
9#include <string>
10
11#include "absl/strings/numbers.h"
12#include "absl/strings/str_format.h"
13#include "absl/strings/str_split.h"
14
Austin Schuh979d4772022-12-30 14:50:41 -080015#define PF_KTHREAD 0x00200000
16
James Kuszmaul418fd062022-03-22 15:22:27 -070017namespace aos::util {
18namespace {
19std::optional<std::string> ReadShortFile(std::string_view file_name) {
20 // Open as input and seek to end immediately.
21 std::ifstream file(std::string(file_name), std::ios_base::in);
22 if (!file.good()) {
23 VLOG(1) << "Can't read " << file_name;
24 return std::nullopt;
25 }
26 const size_t kMaxLineLength = 4096;
27 char buffer[kMaxLineLength];
28 file.read(buffer, kMaxLineLength);
29 if (!file.eof()) {
30 return std::nullopt;
31 }
32 return std::string(buffer, file.gcount());
33}
34} // namespace
35
Maxwell Gumleyb27245f2024-04-11 15:46:22 -060036std::optional<ProcStat> ReadProcStat(const pid_t pid,
37 const std::optional<pid_t> tid) {
38 const std::string path =
39 tid.has_value() ? absl::StrFormat("/proc/%d/task/%d/stat", pid, *tid)
40 : absl::StrFormat("/proc/%d/stat", pid);
41 const std::optional<std::string> contents = ReadShortFile(path);
James Kuszmaul418fd062022-03-22 15:22:27 -070042 if (!contents.has_value()) {
43 return std::nullopt;
44 }
45 const size_t start_name = contents->find_first_of('(');
46 const size_t end_name = contents->find_last_of(')');
47 if (start_name == std::string::npos || end_name == std::string::npos ||
48 end_name < start_name) {
49 VLOG(1) << "No name found in stat line " << contents.value();
50 return std::nullopt;
51 }
52 std::string_view name(contents->c_str() + start_name + 1,
53 end_name - start_name - 1);
54
55 std::vector<std::string_view> fields =
56 absl::StrSplit(std::string_view(contents->c_str() + end_name + 1,
57 contents->size() - end_name - 1),
58 ' ', absl::SkipWhitespace());
59 constexpr int kNumFieldsAfterName = 50;
60 if (fields.size() != kNumFieldsAfterName) {
61 VLOG(1) << "Incorrect number of fields " << fields.size();
62 return std::nullopt;
63 }
64 // The first field is a character for the current process state; every single
65 // field after that should be an integer.
66 if (fields[0].size() != 1) {
67 VLOG(1) << "State field is too long: " << fields[0];
68 return std::nullopt;
69 }
70 std::array<absl::int128, kNumFieldsAfterName - 1> numbers;
71 for (int ii = 1; ii < kNumFieldsAfterName; ++ii) {
72 if (!absl::SimpleAtoi(fields[ii], &numbers[ii - 1])) {
73 VLOG(1) << "Failed to parse field " << ii << " as number: " << fields[ii];
74 return std::nullopt;
75 }
76 }
77 return ProcStat{
78 .pid = pid,
79 .name = std::string(name),
80 .state = fields.at(0).at(0),
81 .parent_pid = static_cast<int64_t>(numbers.at(0)),
82 .group_id = static_cast<int64_t>(numbers.at(1)),
83 .session_id = static_cast<int64_t>(numbers.at(2)),
84 .tty = static_cast<int64_t>(numbers.at(3)),
85 .tpgid = static_cast<int64_t>(numbers.at(4)),
86 .kernel_flags = static_cast<uint64_t>(numbers.at(5)),
87 .minor_faults = static_cast<uint64_t>(numbers.at(6)),
88 .children_minor_faults = static_cast<uint64_t>(numbers.at(7)),
89 .major_faults = static_cast<uint64_t>(numbers.at(8)),
90 .children_major_faults = static_cast<uint64_t>(numbers.at(9)),
91 .user_mode_ticks = static_cast<uint64_t>(numbers.at(10)),
92 .kernel_mode_ticks = static_cast<uint64_t>(numbers.at(11)),
93 .children_user_mode_ticks = static_cast<int64_t>(numbers.at(12)),
94 .children_kernel_mode_ticks = static_cast<int64_t>(numbers.at(13)),
95 .priority = static_cast<int64_t>(numbers.at(14)),
96 .nice = static_cast<int64_t>(numbers.at(15)),
97 .num_threads = static_cast<int64_t>(numbers.at(16)),
98 .itrealvalue = static_cast<int64_t>(numbers.at(17)),
99 .start_time_ticks = static_cast<uint64_t>(numbers.at(18)),
100 .virtual_memory_size = static_cast<uint64_t>(numbers.at(19)),
101 .resident_set_size = static_cast<int64_t>(numbers.at(20)),
102 .rss_soft_limit = static_cast<uint64_t>(numbers.at(21)),
103 .start_code_address = static_cast<uint64_t>(numbers.at(22)),
104 .end_code_address = static_cast<uint64_t>(numbers.at(23)),
105 .start_stack_address = static_cast<uint64_t>(numbers.at(24)),
106 .stack_pointer = static_cast<uint64_t>(numbers.at(25)),
107 .instruction_pointer = static_cast<uint64_t>(numbers.at(26)),
108 .signal_bitmask = static_cast<uint64_t>(numbers.at(27)),
109 .blocked_signals = static_cast<uint64_t>(numbers.at(28)),
110 .ignored_signals = static_cast<uint64_t>(numbers.at(29)),
111 .caught_signals = static_cast<uint64_t>(numbers.at(30)),
112 .wchan = static_cast<uint64_t>(numbers.at(31)),
113 .swap_pages = static_cast<uint64_t>(numbers.at(32)),
114 .children_swap_pages = static_cast<uint64_t>(numbers.at(33)),
115 .exit_signal = static_cast<int64_t>(numbers.at(34)),
116 .processor = static_cast<int64_t>(numbers.at(35)),
117 .rt_priority = static_cast<uint64_t>(numbers.at(36)),
118 .scheduling_policy = static_cast<uint64_t>(numbers.at(37)),
119 .block_io_delay_ticks = static_cast<uint64_t>(numbers.at(38)),
120 .guest_ticks = static_cast<uint64_t>(numbers.at(39)),
121 .children_guest_ticks = static_cast<uint64_t>(numbers.at(40)),
122 .start_data_address = static_cast<uint64_t>(numbers.at(41)),
123 .end_data_address = static_cast<uint64_t>(numbers.at(42)),
124 .start_brk_address = static_cast<uint64_t>(numbers.at(43)),
125 .start_arg_address = static_cast<uint64_t>(numbers.at(44)),
126 .end_arg_address = static_cast<uint64_t>(numbers.at(45)),
127 .start_env_address = static_cast<uint64_t>(numbers.at(46)),
128 .end_env_address = static_cast<uint64_t>(numbers.at(47)),
129 .exit_code = static_cast<int64_t>(numbers.at(48))};
130}
131
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600132Top::Top(aos::EventLoop *event_loop, TrackThreadsMode track_threads,
133 TrackPerThreadInfoMode track_per_thread_info)
James Kuszmaul418fd062022-03-22 15:22:27 -0700134 : event_loop_(event_loop),
135 clock_tick_(std::chrono::nanoseconds(1000000000 / sysconf(_SC_CLK_TCK))),
milind-ueb075d22023-02-24 14:57:43 -0800136 page_size_(sysconf(_SC_PAGESIZE)),
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600137 track_threads_(track_threads),
138 track_per_thread_info_(track_per_thread_info) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700139 TimerHandler *timer = event_loop_->AddTimer([this]() { UpdateReadings(); });
140 event_loop_->OnRun([timer, this]() {
Philipp Schradera6712522023-07-05 20:25:11 -0700141 timer->Schedule(event_loop_->monotonic_now(), kSamplePeriod);
James Kuszmaul418fd062022-03-22 15:22:27 -0700142 });
143}
144
145std::chrono::nanoseconds Top::TotalProcessTime(const ProcStat &proc_stat) {
146 return (proc_stat.user_mode_ticks + proc_stat.kernel_mode_ticks) *
147 clock_tick_;
148}
149
150aos::monotonic_clock::time_point Top::ProcessStartTime(
151 const ProcStat &proc_stat) {
152 return aos::monotonic_clock::time_point(proc_stat.start_time_ticks *
153 clock_tick_);
154}
155
156uint64_t Top::RealMemoryUsage(const ProcStat &proc_stat) {
157 return proc_stat.resident_set_size * page_size_;
158}
159
milind-ueb075d22023-02-24 14:57:43 -0800160void Top::MaybeAddThreadIds(pid_t pid, std::set<pid_t> *pids) {
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600161 if (track_threads_ == TrackThreadsMode::kDisabled) {
milind-ueb075d22023-02-24 14:57:43 -0800162 return;
163 }
164
165 // Add all the threads in /proc/pid/task
166 std::string task_dir = absl::StrCat("/proc/", std::to_string(pid), "/task/");
167 DIR *dir = opendir(task_dir.data());
168 if (dir == nullptr) {
169 LOG(WARNING) << "Unable to open " << task_dir;
170 return;
171 }
172
173 while (true) {
174 struct dirent *const dir_entry = readdir(dir);
175 if (dir_entry == nullptr) {
176 break;
177 }
178 pid_t tid;
179 if (absl::SimpleAtoi(dir_entry->d_name, &tid)) {
180 pids->emplace(tid);
181 }
182 }
183 closedir(dir);
184}
185
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600186ThreadState CharToThreadState(const char state) {
187 switch (state) {
188 case 'R':
189 return ThreadState::RUNNING;
190 case 'S':
191 return ThreadState::SLEEPING_INTERRUPTIBLE;
192 case 'D':
193 return ThreadState::SLEEPING_UNINTERRUPTIBLE;
194 case 'T':
195 return ThreadState::STOPPED;
196 case 'Z':
197 return ThreadState::ZOMBIE;
198 case 'I':
199 return ThreadState::IDLE;
200 default:
201 LOG(FATAL) << "Invalid thread state character: " << state;
202 }
203}
204
205void Top::UpdateThreadReadings(pid_t pid, ProcessReadings &process) {
206 // Construct the path to the task directory which lists all threads
207 std::string task_dir = absl::StrFormat("/proc/%d/task", pid);
208
209 // Verify we can open the directory.
210 DIR *dir = opendir(task_dir.c_str());
211 if (dir == nullptr) {
212 LOG_EVERY_T(WARNING, 10) << "Unable to open directory: " << task_dir
213 << ", error: " << strerror(errno);
214 ;
215 return;
216 }
217
218 // Use a set to track all the threads that we process.
219 std::set<pid_t> updated_threads;
220
221 // Iterate over all entries in the directory.
222 struct dirent *entry;
223 while ((entry = readdir(dir)) != nullptr) {
224 // Skip non-directories
225 if (entry->d_type != DT_DIR) {
226 continue;
227 }
228
229 // Skip "." and "..".
230 const bool is_current_dir = strcmp(entry->d_name, ".") == 0;
231 const bool is_parent_dir = strcmp(entry->d_name, "..") == 0;
232 if (is_current_dir || is_parent_dir) {
233 continue;
234 }
235
236 // Verify the entry is a valid thread ID.
237 pid_t tid;
238 const bool is_valid_thread_id = absl::SimpleAtoi(entry->d_name, &tid);
239 if (!is_valid_thread_id) {
240 continue;
241 }
242
243 // Read the stats for the thread.
244 const std::optional<ProcStat> thread_stats = ReadProcStat(pid, tid);
245
246 // If no stats could be read (thread may have exited), remove it.
247 if (!thread_stats.has_value()) {
248 VLOG(2) << "Removing thread " << tid << " from process " << pid;
249 process.thread_readings.erase(tid);
250 continue;
251 }
252
253 const ThreadState thread_state = CharToThreadState(thread_stats->state);
254
255 // Find or create new thread reading entry.
256 ThreadReadings &thread_reading = process.thread_readings[tid];
257
258 // Update thread name.
259 thread_reading.name = thread_stats.value().name;
260 thread_reading.start_time = ProcessStartTime(thread_stats.value());
261
262 // Update ThreadReadings with the latest cpu usage.
263 aos::RingBuffer<ThreadReading, kRingBufferSize> &readings =
264 thread_reading.readings;
265 const aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
266 const std::chrono::nanoseconds run_time =
267 TotalProcessTime(thread_stats.value());
268 // The ring buffer will push out the oldest entry if it is full.
269 readings.Push({now, run_time});
270
271 // If the buffer is full, update the CPU usage percentage.
272 if (readings.full()) {
273 const ThreadReading &previous = readings[0];
274 const ThreadReading &current = readings[1];
275 const std::chrono::nanoseconds run_time =
276 current.total_run_time - previous.total_run_time;
277 const std::chrono::nanoseconds reading_time =
278 current.reading_time - previous.reading_time;
279 thread_reading.cpu_percent = aos::time::DurationInSeconds(run_time) /
280 aos::time::DurationInSeconds(reading_time);
281 thread_reading.state = thread_state;
282 }
283 updated_threads.insert(tid);
284 }
285
286 // Remove all threads from process.thread_readings that didn't get updated.
287 std::vector<pid_t> threads_to_remove;
288 for (const auto &[tid, thread_reading] : process.thread_readings) {
289 if (!updated_threads.contains(tid)) {
290 threads_to_remove.push_back(tid);
291 }
292 }
293 for (const pid_t tid : threads_to_remove) {
294 process.thread_readings.erase(tid);
295 }
296
297 // Close the directory.
298 closedir(dir);
299}
300
James Kuszmaul418fd062022-03-22 15:22:27 -0700301void Top::UpdateReadings() {
302 aos::monotonic_clock::time_point now = event_loop_->monotonic_now();
303 // Get all the processes that we *might* care about.
304 std::set<pid_t> pids = pids_to_track_;
James Kuszmaul63a45482022-04-19 16:12:01 -0700305 // Ensure that we check on the status of every process that we are already
306 // tracking.
Austin Schuh60e77942022-05-16 17:48:24 -0700307 for (const auto &reading : readings_) {
James Kuszmaul63a45482022-04-19 16:12:01 -0700308 pids.insert(reading.first);
milind-ueb075d22023-02-24 14:57:43 -0800309 MaybeAddThreadIds(reading.first, &pids);
James Kuszmaul63a45482022-04-19 16:12:01 -0700310 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700311 if (track_all_) {
312 DIR *const dir = opendir("/proc");
313 if (dir == nullptr) {
314 PLOG(FATAL) << "Failed to open /proc";
315 }
316 while (true) {
317 struct dirent *const dir_entry = readdir(dir);
318 if (dir_entry == nullptr) {
319 break;
320 }
321 pid_t pid;
322 if (dir_entry->d_type == DT_DIR &&
323 absl::SimpleAtoi(dir_entry->d_name, &pid)) {
324 pids.insert(pid);
milind-ueb075d22023-02-24 14:57:43 -0800325 MaybeAddThreadIds(pid, &pids);
James Kuszmaul418fd062022-03-22 15:22:27 -0700326 }
327 }
James Kuszmaul28c9e392022-11-04 13:24:12 -0700328 closedir(dir);
James Kuszmaul418fd062022-03-22 15:22:27 -0700329 }
330
331 for (const pid_t pid : pids) {
332 std::optional<ProcStat> proc_stat = ReadProcStat(pid);
333 // Stop tracking processes that have died.
334 if (!proc_stat.has_value()) {
335 readings_.erase(pid);
336 continue;
337 }
338 const aos::monotonic_clock::time_point start_time =
339 ProcessStartTime(*proc_stat);
340 auto reading_iter = readings_.find(pid);
341 if (reading_iter == readings_.end()) {
Austin Schuh979d4772022-12-30 14:50:41 -0800342 reading_iter =
343 readings_
344 .insert(std::make_pair(
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600345 pid,
346 ProcessReadings{
347 .name = proc_stat->name,
348 .start_time = start_time,
349 .cpu_percent = 0.0,
350 .kthread = !!(proc_stat->kernel_flags & PF_KTHREAD),
351 .readings = {},
352 .thread_readings = {},
353 }))
Austin Schuh979d4772022-12-30 14:50:41 -0800354 .first;
James Kuszmaul418fd062022-03-22 15:22:27 -0700355 }
356 ProcessReadings &process = reading_iter->second;
357 // The process associated with the PID has changed; reset the state.
358 if (process.start_time != start_time) {
359 process.name = proc_stat->name;
360 process.start_time = start_time;
361 process.readings.Reset();
362 }
James Kuszmaul6b35e3a2022-04-06 15:00:39 -0700363 // If the process name has changed (e.g., if our first reading for a process
364 // name occurred before execvp was called), then update it.
365 if (process.name != proc_stat->name) {
366 process.name = proc_stat->name;
367 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700368
369 process.readings.Push(Reading{now, TotalProcessTime(*proc_stat),
370 RealMemoryUsage(*proc_stat)});
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600371 if (process.readings.full()) {
James Kuszmaul418fd062022-03-22 15:22:27 -0700372 process.cpu_percent =
373 aos::time::DurationInSeconds(process.readings[1].total_run_time -
374 process.readings[0].total_run_time) /
375 aos::time::DurationInSeconds(process.readings[1].reading_time -
376 process.readings[0].reading_time);
377 } else {
378 process.cpu_percent = 0.0;
379 }
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600380
381 // Update thread readings for this process
382 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled) {
383 UpdateThreadReadings(pid, process);
384 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700385 }
Austin Schuh608514f2022-12-30 15:51:30 -0800386
387 if (on_reading_update_) {
388 on_reading_update_();
389 }
James Kuszmaul418fd062022-03-22 15:22:27 -0700390}
391
392flatbuffers::Offset<ProcessInfo> Top::InfoForProcess(
393 flatbuffers::FlatBufferBuilder *fbb, pid_t pid) {
394 auto reading_iter = readings_.find(pid);
395 if (reading_iter == readings_.end()) {
396 return {};
397 }
398 const ProcessReadings &reading = reading_iter->second;
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600399
400 if (reading.readings.empty()) {
401 return {}; // Return an empty offset if readings is empty.
402 }
403
404 std::vector<flatbuffers::Offset<ThreadInfo>> thread_infos_offsets;
405 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<ThreadInfo>>>
406 threads_vector_offset;
407
408 if (track_per_thread_info_ == TrackPerThreadInfoMode::kEnabled &&
409 !reading.thread_readings.empty()) {
410 thread_infos_offsets.reserve(reading.thread_readings.size());
411 for (const auto &[tid, thread_reading] : reading.thread_readings) {
412 // Calculate how long the thread has been alive by comparing the thread
413 // start time to the current time.
414 const aos::monotonic_clock::time_point start_time =
415 thread_reading.start_time;
416 // convert start_time to int64
417 const int64_t start_time_ns = start_time.time_since_epoch().count();
418
419 const flatbuffers::Offset<flatbuffers::String> threadName =
420 fbb->CreateString(thread_reading.name);
421 ThreadInfo::Builder thread_info_builder(*fbb);
422 thread_info_builder.add_tid(tid);
423 thread_info_builder.add_name(threadName);
424 thread_info_builder.add_cpu_usage(thread_reading.cpu_percent);
425 thread_info_builder.add_start_time(start_time_ns);
426 thread_info_builder.add_state(thread_reading.state);
427 const flatbuffers::Offset<ThreadInfo> threadInfo =
428 thread_info_builder.Finish();
429 thread_infos_offsets.push_back(threadInfo);
430 }
431 threads_vector_offset = fbb->CreateVector(thread_infos_offsets);
432 } else {
433 threads_vector_offset = 0;
434 }
435
436 // Create name string offset
James Kuszmaul418fd062022-03-22 15:22:27 -0700437 const flatbuffers::Offset<flatbuffers::String> name =
438 fbb->CreateString(reading.name);
439 ProcessInfo::Builder builder(*fbb);
440 builder.add_pid(pid);
441 builder.add_name(name);
442 builder.add_cpu_usage(reading.cpu_percent);
443 builder.add_physical_memory(
444 reading.readings[reading.readings.size() - 1].memory_usage);
Maxwell Gumleyb27245f2024-04-11 15:46:22 -0600445 if (!threads_vector_offset.IsNull()) {
446 builder.add_threads(threads_vector_offset);
447 }
448
James Kuszmaul418fd062022-03-22 15:22:27 -0700449 return builder.Finish();
450}
451
452flatbuffers::Offset<TopProcessesFbs> Top::TopProcesses(
453 flatbuffers::FlatBufferBuilder *fbb, int n) {
454 // Pair is {cpu_usage, pid}.
455 std::priority_queue<std::pair<double, pid_t>> cpu_usages;
456 for (const auto &pair : readings_) {
457 // Deliberately include 0.0 percent CPU things in the usage list so that if
458 // the user asks for an arbitrarily large number of processes they'll get
459 // everything.
460 cpu_usages.push(std::make_pair(pair.second.cpu_percent, pair.first));
461 }
462 std::vector<flatbuffers::Offset<ProcessInfo>> offsets;
463 for (int ii = 0; ii < n && !cpu_usages.empty(); ++ii) {
464 offsets.push_back(InfoForProcess(fbb, cpu_usages.top().second));
465 cpu_usages.pop();
466 }
467 const flatbuffers::Offset<
468 flatbuffers::Vector<flatbuffers::Offset<ProcessInfo>>>
469 vector_offset = fbb->CreateVector(offsets);
470 TopProcessesFbs::Builder builder(*fbb);
471 builder.add_processes(vector_offset);
472 return builder.Finish();
473}
474
475} // namespace aos::util