Track CPU usage of AOS processes in starterd
Change-Id: I7aff742ec992c20a09fcf1ffcf5de4cbdec4bef8
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/starter/BUILD b/aos/starter/BUILD
index 2186421..7590b3b 100644
--- a/aos/starter/BUILD
+++ b/aos/starter/BUILD
@@ -33,6 +33,7 @@
"//aos/events:event_loop",
"//aos/events:shm_event_loop",
"//aos/util:scoped_pipe",
+ "//aos/util:top",
"@com_github_google_glog//:glog",
],
)
@@ -163,6 +164,7 @@
name = "starter_fbs",
srcs = ["starter.fbs"],
gen_reflections = True,
+ includes = ["//aos/util:process_info_fbs_includes"],
target_compatible_with = ["@platforms//os:linux"],
visibility = ["//visibility:public"],
)
diff --git a/aos/starter/starter.fbs b/aos/starter/starter.fbs
index 4b66833..7285281 100644
--- a/aos/starter/starter.fbs
+++ b/aos/starter/starter.fbs
@@ -1,3 +1,5 @@
+include "aos/util/process_info.fbs";
+
namespace aos.starter;
enum State : short {
@@ -73,6 +75,13 @@
// Indicates the reason the application is not running. Only valid if
// application is STOPPED.
last_stop_reason: LastStopReason (id: 6);
+
+ // Debug information providing the approximate CPU usage and memory footprint of the process.
+ // Populated whenever the process is running (i.e., state != STOPPED). While STOPPING could
+ // refer to another process if another process has somehow claimed the application's PID between
+ // actually stopping and the parent process receiving the signal indicating that the application
+ // finished stopping.
+ process_info: util.ProcessInfo (id: 7);
}
root_type Status;
diff --git a/aos/starter/starter_test.cc b/aos/starter/starter_test.cc
index 120fe38..033b3c7 100644
--- a/aos/starter/starter_test.cc
+++ b/aos/starter/starter_test.cc
@@ -287,27 +287,31 @@
})
->Setup(watcher_loop.monotonic_now() + std::chrono::seconds(7));
- watcher_loop.MakeWatcher(
- "/aos", [&watcher_loop](const aos::starter::Status &status) {
- const aos::starter::ApplicationStatus *ping_app_status =
- FindApplicationStatus(status, "ping");
- const aos::starter::ApplicationStatus *pong_app_status =
- FindApplicationStatus(status, "pong");
- if (ping_app_status == nullptr || pong_app_status == nullptr) {
- return;
- }
+ watcher_loop.MakeWatcher("/aos", [&watcher_loop](
+ const aos::starter::Status &status) {
+ const aos::starter::ApplicationStatus *ping_app_status =
+ FindApplicationStatus(status, "ping");
+ const aos::starter::ApplicationStatus *pong_app_status =
+ FindApplicationStatus(status, "pong");
+ if (ping_app_status == nullptr || pong_app_status == nullptr) {
+ return;
+ }
- if (ping_app_status->has_state() &&
- ping_app_status->state() != aos::starter::State::STOPPED) {
- watcher_loop.Exit();
- FAIL();
- }
- if (pong_app_status->has_state() &&
- pong_app_status->state() == aos::starter::State::RUNNING) {
- watcher_loop.Exit();
- SUCCEED();
- }
- });
+ if (ping_app_status->has_state() &&
+ ping_app_status->state() != aos::starter::State::STOPPED) {
+ watcher_loop.Exit();
+ FAIL();
+ }
+ if (pong_app_status->has_state() &&
+ pong_app_status->state() == aos::starter::State::RUNNING) {
+ ASSERT_TRUE(pong_app_status->has_process_info());
+ ASSERT_EQ("pong", pong_app_status->process_info()->name()->string_view());
+ ASSERT_EQ(pong_app_status->pid(), pong_app_status->process_info()->pid());
+ ASSERT_LT(0.0, pong_app_status->process_info()->cpu_usage());
+ watcher_loop.Exit();
+ SUCCEED();
+ }
+ });
std::thread starterd_thread([&starter] { starter.Run(); });
watcher_loop.Run();
diff --git a/aos/starter/starterd_lib.cc b/aos/starter/starterd_lib.cc
index 008c46f..84e4d00 100644
--- a/aos/starter/starterd_lib.cc
+++ b/aos/starter/starterd_lib.cc
@@ -33,7 +33,8 @@
event_loop_.GetChannel<aos::starter::Status>("/aos")->frequency() -
1),
listener_(&event_loop_,
- [this](signalfd_siginfo signal) { OnSignal(signal); }) {
+ [this](signalfd_siginfo signal) { OnSignal(signal); }),
+ top_(&event_loop_) {
event_loop_.SkipAosLog();
event_loop_.OnRun([this] {
@@ -117,7 +118,16 @@
}
}
-void Starter::MaybeSendStatus() {
+void Starter::HandleStateChange() {
+ std::set<pid_t> all_pids;
+ for (const auto &pair : applications_) {
+ if (pair.second.get_pid() > 0 &&
+ pair.second.status() != aos::starter::State::STOPPED) {
+ all_pids.insert(pair.second.get_pid());
+ }
+ }
+ top_.set_track_pids(all_pids);
+
if (status_count_ < max_status_count_) {
SendStatus();
++status_count_;
@@ -165,9 +175,9 @@
}
Application *Starter::AddApplication(const aos::Application *application) {
- auto [iter, success] =
- applications_.try_emplace(application->name()->str(), application,
- &event_loop_, [this]() { MaybeSendStatus(); });
+ auto [iter, success] = applications_.try_emplace(
+ application->name()->str(), application, &event_loop_,
+ [this]() { HandleStateChange(); });
if (success) {
// We should be catching and handling SIGCHLD correctly in the starter, so
// don't leave in the crutch for polling for the child process status (this
@@ -200,7 +210,7 @@
std::vector<flatbuffers::Offset<aos::starter::ApplicationStatus>> statuses;
for (auto &application : applications_) {
- statuses.push_back(application.second.PopulateStatus(builder.fbb()));
+ statuses.push_back(application.second.PopulateStatus(builder.fbb(), &top_));
}
auto statuses_fbs = builder.fbb()->CreateVector(statuses);
diff --git a/aos/starter/starterd_lib.h b/aos/starter/starterd_lib.h
index 834e191..e7ded59 100644
--- a/aos/starter/starterd_lib.h
+++ b/aos/starter/starterd_lib.h
@@ -17,6 +17,7 @@
#include "aos/starter/starter_generated.h"
#include "aos/starter/starter_rpc_generated.h"
#include "aos/starter/subprocess.h"
+#include "aos/util/top.h"
namespace aos {
namespace starter {
@@ -49,8 +50,10 @@
void OnSignal(signalfd_siginfo signal);
void HandleStarterRpc(const StarterRpc &command);
- // Sends the Status message if it wouldn't exceed the rate limit.
- void MaybeSendStatus();
+ // Handles any potential state change in the child applications.
+ // In particular, sends the Status message if it wouldn't exceed the rate
+ // limit.
+ void HandleStateChange();
void SendStatus();
@@ -73,6 +76,8 @@
SignalListener listener_;
+ util::Top top_;
+
DISALLOW_COPY_AND_ASSIGN(Starter);
};
diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
index c1eb618..f0c8f85 100644
--- a/aos/starter/subprocess.cc
+++ b/aos/starter/subprocess.cc
@@ -346,10 +346,16 @@
}
flatbuffers::Offset<aos::starter::ApplicationStatus>
-Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder) {
+Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
+ util::Top *top) {
CHECK_NOTNULL(builder);
auto name_fbs = builder->CreateString(name_);
+ const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
+ const flatbuffers::Offset<util::ProcessInfo> process_info =
+ valid_pid ? top->InfoForProcess(builder, pid_)
+ : flatbuffers::Offset<util::ProcessInfo>();
+
aos::starter::ApplicationStatus::Builder status_builder(*builder);
status_builder.add_name(name_fbs);
status_builder.add_state(status_);
@@ -361,6 +367,8 @@
status_builder.add_pid(pid_);
status_builder.add_id(id_);
}
+ // Note that even if process_info is null, calling add_process_info is fine.
+ status_builder.add_process_info(process_info);
status_builder.add_last_start_time(start_time_.time_since_epoch().count());
return status_builder.Finish();
}
diff --git a/aos/starter/subprocess.h b/aos/starter/subprocess.h
index 9ee9e31..a4d7cbb 100644
--- a/aos/starter/subprocess.h
+++ b/aos/starter/subprocess.h
@@ -11,6 +11,7 @@
#include "aos/starter/starter_generated.h"
#include "aos/starter/starter_rpc_generated.h"
#include "aos/util/scoped_pipe.h"
+#include "aos/util/top.h"
namespace aos::starter {
@@ -45,7 +46,7 @@
aos::EventLoop *event_loop, std::function<void()> on_change);
flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus(
- flatbuffers::FlatBufferBuilder *builder);
+ flatbuffers::FlatBufferBuilder *builder, util::Top *top);
aos::starter::State status() const { return status_; };
// Returns the last pid of this process. -1 if not started yet.