Track CPU usage of AOS processes in starterd

Change-Id: I7aff742ec992c20a09fcf1ffcf5de4cbdec4bef8
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/starter/BUILD b/aos/starter/BUILD
index 2186421..7590b3b 100644
--- a/aos/starter/BUILD
+++ b/aos/starter/BUILD
@@ -33,6 +33,7 @@
         "//aos/events:event_loop",
         "//aos/events:shm_event_loop",
         "//aos/util:scoped_pipe",
+        "//aos/util:top",
         "@com_github_google_glog//:glog",
     ],
 )
@@ -163,6 +164,7 @@
     name = "starter_fbs",
     srcs = ["starter.fbs"],
     gen_reflections = True,
+    includes = ["//aos/util:process_info_fbs_includes"],
     target_compatible_with = ["@platforms//os:linux"],
     visibility = ["//visibility:public"],
 )
diff --git a/aos/starter/starter.fbs b/aos/starter/starter.fbs
index 4b66833..7285281 100644
--- a/aos/starter/starter.fbs
+++ b/aos/starter/starter.fbs
@@ -1,3 +1,5 @@
+include "aos/util/process_info.fbs";
+
 namespace aos.starter;
 
 enum State : short {
@@ -73,6 +75,13 @@
   // Indicates the reason the application is not running. Only valid if
   // application is STOPPED.
   last_stop_reason: LastStopReason (id: 6);
+
+  // Debug information providing the approximate CPU usage and memory footprint of the process.
+  // Populated whenever the process is running (i.e., state != STOPPED). While STOPPING could
+  // refer to another process if another process has somehow claimed the application's PID between
+  // actually stopping and the parent process receiving the signal indicating that the application
+  // finished stopping.
+  process_info: util.ProcessInfo (id: 7);
 }
 
 root_type Status;
diff --git a/aos/starter/starter_test.cc b/aos/starter/starter_test.cc
index 120fe38..033b3c7 100644
--- a/aos/starter/starter_test.cc
+++ b/aos/starter/starter_test.cc
@@ -287,27 +287,31 @@
       })
       ->Setup(watcher_loop.monotonic_now() + std::chrono::seconds(7));
 
-  watcher_loop.MakeWatcher(
-      "/aos", [&watcher_loop](const aos::starter::Status &status) {
-        const aos::starter::ApplicationStatus *ping_app_status =
-            FindApplicationStatus(status, "ping");
-        const aos::starter::ApplicationStatus *pong_app_status =
-            FindApplicationStatus(status, "pong");
-        if (ping_app_status == nullptr || pong_app_status == nullptr) {
-          return;
-        }
+  watcher_loop.MakeWatcher("/aos", [&watcher_loop](
+                                       const aos::starter::Status &status) {
+    const aos::starter::ApplicationStatus *ping_app_status =
+        FindApplicationStatus(status, "ping");
+    const aos::starter::ApplicationStatus *pong_app_status =
+        FindApplicationStatus(status, "pong");
+    if (ping_app_status == nullptr || pong_app_status == nullptr) {
+      return;
+    }
 
-        if (ping_app_status->has_state() &&
-            ping_app_status->state() != aos::starter::State::STOPPED) {
-          watcher_loop.Exit();
-          FAIL();
-        }
-        if (pong_app_status->has_state() &&
-            pong_app_status->state() == aos::starter::State::RUNNING) {
-          watcher_loop.Exit();
-          SUCCEED();
-        }
-      });
+    if (ping_app_status->has_state() &&
+        ping_app_status->state() != aos::starter::State::STOPPED) {
+      watcher_loop.Exit();
+      FAIL();
+    }
+    if (pong_app_status->has_state() &&
+        pong_app_status->state() == aos::starter::State::RUNNING) {
+      ASSERT_TRUE(pong_app_status->has_process_info());
+      ASSERT_EQ("pong", pong_app_status->process_info()->name()->string_view());
+      ASSERT_EQ(pong_app_status->pid(), pong_app_status->process_info()->pid());
+      ASSERT_LT(0.0, pong_app_status->process_info()->cpu_usage());
+      watcher_loop.Exit();
+      SUCCEED();
+    }
+  });
 
   std::thread starterd_thread([&starter] { starter.Run(); });
   watcher_loop.Run();
diff --git a/aos/starter/starterd_lib.cc b/aos/starter/starterd_lib.cc
index 008c46f..84e4d00 100644
--- a/aos/starter/starterd_lib.cc
+++ b/aos/starter/starterd_lib.cc
@@ -33,7 +33,8 @@
           event_loop_.GetChannel<aos::starter::Status>("/aos")->frequency() -
           1),
       listener_(&event_loop_,
-                [this](signalfd_siginfo signal) { OnSignal(signal); }) {
+                [this](signalfd_siginfo signal) { OnSignal(signal); }),
+      top_(&event_loop_) {
   event_loop_.SkipAosLog();
 
   event_loop_.OnRun([this] {
@@ -117,7 +118,16 @@
   }
 }
 
-void Starter::MaybeSendStatus() {
+void Starter::HandleStateChange() {
+  std::set<pid_t> all_pids;
+  for (const auto &pair : applications_) {
+    if (pair.second.get_pid() > 0 &&
+        pair.second.status() != aos::starter::State::STOPPED) {
+      all_pids.insert(pair.second.get_pid());
+    }
+  }
+  top_.set_track_pids(all_pids);
+
   if (status_count_ < max_status_count_) {
     SendStatus();
     ++status_count_;
@@ -165,9 +175,9 @@
 }
 
 Application *Starter::AddApplication(const aos::Application *application) {
-  auto [iter, success] =
-      applications_.try_emplace(application->name()->str(), application,
-                                &event_loop_, [this]() { MaybeSendStatus(); });
+  auto [iter, success] = applications_.try_emplace(
+      application->name()->str(), application, &event_loop_,
+      [this]() { HandleStateChange(); });
   if (success) {
     // We should be catching and handling SIGCHLD correctly in the starter, so
     // don't leave in the crutch for polling for the child process status (this
@@ -200,7 +210,7 @@
   std::vector<flatbuffers::Offset<aos::starter::ApplicationStatus>> statuses;
 
   for (auto &application : applications_) {
-    statuses.push_back(application.second.PopulateStatus(builder.fbb()));
+    statuses.push_back(application.second.PopulateStatus(builder.fbb(), &top_));
   }
 
   auto statuses_fbs = builder.fbb()->CreateVector(statuses);
diff --git a/aos/starter/starterd_lib.h b/aos/starter/starterd_lib.h
index 834e191..e7ded59 100644
--- a/aos/starter/starterd_lib.h
+++ b/aos/starter/starterd_lib.h
@@ -17,6 +17,7 @@
 #include "aos/starter/starter_generated.h"
 #include "aos/starter/starter_rpc_generated.h"
 #include "aos/starter/subprocess.h"
+#include "aos/util/top.h"
 
 namespace aos {
 namespace starter {
@@ -49,8 +50,10 @@
   void OnSignal(signalfd_siginfo signal);
   void HandleStarterRpc(const StarterRpc &command);
 
-  // Sends the Status message if it wouldn't exceed the rate limit.
-  void MaybeSendStatus();
+  // Handles any potential state change in the child applications.
+  // In particular, sends the Status message if it wouldn't exceed the rate
+  // limit.
+  void HandleStateChange();
 
   void SendStatus();
 
@@ -73,6 +76,8 @@
 
   SignalListener listener_;
 
+  util::Top top_;
+
   DISALLOW_COPY_AND_ASSIGN(Starter);
 };
 
diff --git a/aos/starter/subprocess.cc b/aos/starter/subprocess.cc
index c1eb618..f0c8f85 100644
--- a/aos/starter/subprocess.cc
+++ b/aos/starter/subprocess.cc
@@ -346,10 +346,16 @@
 }
 
 flatbuffers::Offset<aos::starter::ApplicationStatus>
-Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder) {
+Application::PopulateStatus(flatbuffers::FlatBufferBuilder *builder,
+                            util::Top *top) {
   CHECK_NOTNULL(builder);
   auto name_fbs = builder->CreateString(name_);
 
+  const bool valid_pid = pid_ > 0 && status_ != aos::starter::State::STOPPED;
+  const flatbuffers::Offset<util::ProcessInfo> process_info =
+      valid_pid ? top->InfoForProcess(builder, pid_)
+                : flatbuffers::Offset<util::ProcessInfo>();
+
   aos::starter::ApplicationStatus::Builder status_builder(*builder);
   status_builder.add_name(name_fbs);
   status_builder.add_state(status_);
@@ -361,6 +367,8 @@
     status_builder.add_pid(pid_);
     status_builder.add_id(id_);
   }
+  // Note that even if process_info is null, calling add_process_info is fine.
+  status_builder.add_process_info(process_info);
   status_builder.add_last_start_time(start_time_.time_since_epoch().count());
   return status_builder.Finish();
 }
diff --git a/aos/starter/subprocess.h b/aos/starter/subprocess.h
index 9ee9e31..a4d7cbb 100644
--- a/aos/starter/subprocess.h
+++ b/aos/starter/subprocess.h
@@ -11,6 +11,7 @@
 #include "aos/starter/starter_generated.h"
 #include "aos/starter/starter_rpc_generated.h"
 #include "aos/util/scoped_pipe.h"
+#include "aos/util/top.h"
 
 namespace aos::starter {
 
@@ -45,7 +46,7 @@
               aos::EventLoop *event_loop, std::function<void()> on_change);
 
   flatbuffers::Offset<aos::starter::ApplicationStatus> PopulateStatus(
-      flatbuffers::FlatBufferBuilder *builder);
+      flatbuffers::FlatBufferBuilder *builder, util::Top *top);
   aos::starter::State status() const { return status_; };
 
   // Returns the last pid of this process. -1 if not started yet.