[orin] Add hardware stats monitor
This binary monitors hardware stats such as thermals, voltage, and
current and writes it out to AOS.
```
pi[361] orin-7971-1 ~
$ aos_dump /orin1/hardware_monitor frc971.orin.HardwareStats
2024-03-17_04-30-00.577662861 (16674.962528984sec) /orin1/hardware_monitor frc971.orin.HardwareStats: {"electrical_readings": [{"channel": 1, "current": 1528, "label": "VDD_IN", "power": 7725, "voltage": 5056}, {"channel": 2, "current": 328, "label": "VDD_CPU_GPU_CV", "power": 1655, "voltage": 5048}, {"channel": 3, "current": 536, "label": "VDD_SOC", "power": 2705, "voltage": 5048}], "fan_speed": 5738, "thermal_zones": [{"id": 0, "name": "CPU-therm", "temperature": 34812}, {"id": 1, "name": "GPU-therm", "temperature": 34156}, {"id": 2, "name": "CV0-therm", "temperature": 33125}, {"id": 3, "name": "CV1-therm", "temperature": 32718}, {"id": 4, "name": "CV2-therm", "temperature": 34156}, {"id": 5, "name": "SOC0-therm", "temperature": 35156}, {"id": 6, "name": "SOC1-therm", "temperature": 36937}, {"id": 7, "name": "SOC2-therm", "temperature": 33562}, {"id": 8, "name": "tj-therm", "temperature": 36937}]}
2024-03-17_04-30-05.577664094 (16679.962530185sec) /orin1/hardware_monitor frc971.orin.HardwareStats: {"electrical_readings": [{"channel": 1, "current": 1416, "label": "VDD_IN", "power": 7159, "voltage": 5056}, {"channel": 2, "current": 360, "label": "VDD_CPU_GPU_CV", "power": 1817, "voltage": 5048}, {"channel": 3, "current": 536, "label": "VDD_SOC", "power": 2710, "voltage": 5056}], "fan_speed": 5715, "thermal_zones": [{"id": 0, "name": "CPU-therm", "temperature": 34781}, {"id": 1, "name": "GPU-therm", "temperature": 34375}, {"id": 2, "name": "CV0-therm", "temperature": 33125}, {"id": 3, "name": "CV1-therm", "temperature": 32687}, {"id": 4, "name": "CV2-therm", "temperature": 34187}, {"id": 5, "name": "SOC0-therm", "temperature": 35031}, {"id": 6, "name": "SOC1-therm", "temperature": 36875}, {"id": 7, "name": "SOC2-therm", "temperature": 33593}, {"id": 8, "name": "tj-therm", "temperature": 36875}]}
2024-03-17_04-30-10.577681840 (16684.962547899sec) /orin1/hardware_monitor frc971.orin.HardwareStats: {"electrical_readings": [{"channel": 1, "current": 1528, "label": "VDD_IN", "power": 7725, "voltage": 5056}, {"channel": 2, "current": 328, "label": "VDD_CPU_GPU_CV", "power": 1655, "voltage": 5048}, {"channel": 3, "current": 536, "label": "VDD_SOC", "power": 2705, "voltage": 5048}], "fan_speed": 5739, "thermal_zones": [{"id": 0, "name": "CPU-therm", "temperature": 34812}, {"id": 1, "name": "GPU-therm", "temperature": 34156}, {"id": 2, "name": "CV0-therm", "temperature": 33062}, {"id": 3, "name": "CV1-therm", "temperature": 32625}, {"id": 4, "name": "CV2-therm", "temperature": 34187}, {"id": 5, "name": "SOC0-therm", "temperature": 35031}, {"id": 6, "name": "SOC1-therm", "temperature": 36937}, {"id": 7, "name": "SOC2-therm", "temperature": 33468}, {"id": 8, "name": "tj-therm", "temperature": 36937}]}
```
Signed-off-by: Tushar Pankaj <tushar.s.pankaj@gmail.com>
Change-Id: Idb0781ea2687595f564f39678b4f9f183e272c90
diff --git a/frc971/orin/BUILD b/frc971/orin/BUILD
index 3ce9040..8c4e9ac 100644
--- a/frc971/orin/BUILD
+++ b/frc971/orin/BUILD
@@ -1,3 +1,5 @@
+load("//aos/flatbuffers:generate.bzl", "static_flatbuffer")
+load("//aos:flatbuffers.bzl", "cc_static_flatbuffer")
load("//frc971:halide.bzl", "halide_library")
exports_files(["orin_irq_config.json"])
@@ -150,3 +152,27 @@
"//third_party:opencv",
],
)
+
+static_flatbuffer(
+ name = "hardware_stats_fbs",
+ srcs = ["hardware_stats.fbs"],
+ visibility = ["//visibility:public"],
+)
+
+cc_static_flatbuffer(
+ name = "hardware_stats_schema",
+ function = "aos::util::HardwareStatsSchema",
+ target = ":hardware_stats_fbs_reflection_out",
+)
+
+cc_binary(
+ name = "hardware_monitor",
+ srcs = ["hardware_monitor.cc"],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":hardware_stats_fbs",
+ "//aos:init",
+ "//aos/events:shm_event_loop",
+ "@com_google_absl//absl/strings",
+ ],
+)
diff --git a/frc971/orin/hardware_monitor.cc b/frc971/orin/hardware_monitor.cc
new file mode 100644
index 0000000..f09d0e6
--- /dev/null
+++ b/frc971/orin/hardware_monitor.cc
@@ -0,0 +1,174 @@
+#include <dirent.h>
+#include <sys/statvfs.h>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_format.h"
+#include "gflags/gflags.h"
+
+#include "aos/events/shm_event_loop.h"
+#include "aos/init.h"
+#include "frc971/orin/hardware_stats_generated.h"
+
+DEFINE_string(config, "aos_config.json", "File path of aos configuration");
+
+namespace frc971::orin {
+namespace {
+std::optional<std::string> ReadFileFirstLine(std::string_view file_name) {
+ std::ifstream file(std::string(file_name), std::ios_base::in);
+ if (!file.good()) {
+ VLOG(1) << "Can't read " << file_name;
+ return std::nullopt;
+ }
+ std::string line;
+ std::getline(file, line);
+ return line;
+}
+
+std::string GetHwmonNumber(const char *dir_name) {
+ DIR *dirp = opendir(dir_name);
+ if (!dirp) {
+ VLOG(1) << "Can't open " << dir_name;
+ return "";
+ }
+ struct dirent *directory_entry;
+ while ((directory_entry = readdir(dirp)) != NULL) {
+ std::string entry_name(directory_entry->d_name);
+ if (entry_name.starts_with("hwmon")) {
+ closedir(dirp);
+ return entry_name;
+ }
+ }
+ closedir(dirp);
+ return "";
+}
+} // namespace
+
+// Periodically sends out the HardwareStats message with hardware statistics
+// info.
+class HardwareMonitor {
+ public:
+ HardwareMonitor(aos::EventLoop *event_loop)
+ : event_loop_(event_loop),
+ sender_(event_loop_->MakeSender<HardwareStats>("/hardware_monitor")),
+ fan_hwmon_(
+ GetHwmonNumber("/sys/devices/platform/39c0000.tachometer/hwmon/")),
+ electrical_hwmon_(GetHwmonNumber(
+ "/sys/devices/platform/c240000.i2c/i2c-1/1-0040/hwmon/")) {
+ periodic_timer_ =
+ event_loop_->AddTimer([this]() { PublishHardwareStats(); });
+ event_loop_->OnRun([this]() {
+ periodic_timer_->Schedule(event_loop_->monotonic_now(),
+ std::chrono::seconds(5));
+ });
+ }
+
+ private:
+ void PublishHardwareStats() {
+ aos::Sender<HardwareStats>::Builder builder = sender_.MakeBuilder();
+ // Iterate through all thermal zones
+ std::vector<flatbuffers::Offset<ThermalZone>> thermal_zones;
+ for (int zone_id = 0; zone_id < 9; zone_id++) {
+ ThermalZone::Builder thermal_zone_builder =
+ builder.MakeBuilder<ThermalZone>();
+ thermal_zone_builder.add_id(zone_id);
+
+ std::optional<std::string> zone_name = ReadFileFirstLine(absl::StrFormat(
+ "/sys/devices/virtual/thermal/thermal_zone%d/type", zone_id));
+ if (zone_name) {
+ thermal_zone_builder.add_name(builder.fbb()->CreateString(*zone_name));
+ }
+
+ std::optional<std::string> temperature_str =
+ ReadFileFirstLine(absl::StrFormat(
+ "/sys/devices/virtual/thermal/thermal_zone%d/temp", zone_id));
+ uint64_t temperature = 0;
+ if (temperature_str && absl::SimpleAtoi(*temperature_str, &temperature)) {
+ thermal_zone_builder.add_temperature(temperature);
+ }
+
+ thermal_zones.emplace_back(thermal_zone_builder.Finish());
+ }
+
+ // Get fan speed
+ std::optional<std::string> fan_speed_str = ReadFileFirstLine(
+ absl::StrFormat("/sys/class/hwmon/%s/rpm", fan_hwmon_));
+
+ // Iterate through INA3221 electrical reading channels
+ std::vector<flatbuffers::Offset<ElectricalReading>> electrical_readings;
+ for (int channel = 1; channel <= 3; channel++) {
+ ElectricalReading::Builder electrical_reading_builder =
+ builder.MakeBuilder<ElectricalReading>();
+ electrical_reading_builder.add_channel(channel);
+
+ std::optional<std::string> label = ReadFileFirstLine(absl::StrFormat(
+ "/sys/class/hwmon/%s/in%d_label", electrical_hwmon_, channel));
+ if (label) {
+ electrical_reading_builder.add_label(
+ builder.fbb()->CreateString(*label));
+ }
+
+ std::optional<std::string> voltage_str =
+ ReadFileFirstLine(absl::StrFormat("/sys/class/hwmon/%s/in%d_input",
+ electrical_hwmon_, channel));
+ uint64_t voltage = 0;
+ if (voltage_str && absl::SimpleAtoi(*voltage_str, &voltage)) {
+ electrical_reading_builder.add_voltage(voltage);
+ }
+
+ std::optional<std::string> current_str =
+ ReadFileFirstLine(absl::StrFormat("/sys/class/hwmon/%s/curr%d_input",
+ electrical_hwmon_, channel));
+ uint64_t current = 0;
+ if (current_str && absl::SimpleAtoi(*current_str, ¤t)) {
+ electrical_reading_builder.add_current(current);
+ }
+
+ uint64_t power = voltage * current / 1000;
+ if (power != 0) {
+ electrical_reading_builder.add_power(power);
+ }
+
+ electrical_readings.emplace_back(electrical_reading_builder.Finish());
+ }
+
+ HardwareStats::Builder hardware_stats_builder =
+ builder.MakeBuilder<HardwareStats>();
+ hardware_stats_builder.add_thermal_zones(
+ builder.fbb()->CreateVector(thermal_zones));
+ uint64_t fan_speed = 0;
+ if (fan_speed_str && absl::SimpleAtoi(*fan_speed_str, &fan_speed)) {
+ hardware_stats_builder.add_fan_speed(fan_speed);
+ }
+ hardware_stats_builder.add_electrical_readings(
+ builder.fbb()->CreateVector(electrical_readings));
+
+ builder.CheckOk(builder.Send(hardware_stats_builder.Finish()));
+ }
+
+ aos::EventLoop *event_loop_;
+
+ aos::Sender<HardwareStats> sender_;
+
+ aos::TimerHandler *periodic_timer_;
+
+ std::string fan_hwmon_;
+
+ std::string electrical_hwmon_;
+};
+
+} // namespace frc971::orin
+
+int main(int argc, char **argv) {
+ aos::InitGoogle(&argc, &argv);
+
+ aos::FlatbufferDetachedBuffer<aos::Configuration> config =
+ aos::configuration::ReadConfig(FLAGS_config);
+
+ aos::ShmEventLoop shm_event_loop(&config.message());
+
+ frc971::orin::HardwareMonitor hardware_monitor(&shm_event_loop);
+
+ shm_event_loop.Run();
+
+ return 0;
+}
diff --git a/frc971/orin/hardware_stats.fbs b/frc971/orin/hardware_stats.fbs
new file mode 100644
index 0000000..9f0fa74
--- /dev/null
+++ b/frc971/orin/hardware_stats.fbs
@@ -0,0 +1,37 @@
+namespace frc971.orin;
+
+// Per-zone stats for a single thermal zone.
+table ThermalZone {
+ // Number of the thermal zone in question.
+ id: uint64 (id: 0);
+ // Name of the thermal zone.
+ name: string (id: 1);
+ // Temperature of the thermal zone in millidegrees C.
+ temperature: uint64 (id: 2);
+}
+
+// Electrical readings from the INA3221 monitoring chip.
+table ElectricalReading {
+ // Number of the INA3221 measurement channel.
+ channel: uint64 (id: 0);
+ // Label of the INA3221 measurement point.
+ label: string (id: 1);
+ // Voltage reading in mV at the measurement point.
+ voltage: uint64 (id: 2);
+ // Current reading in mA through the measurement point.
+ current: uint64 (id: 3);
+ // Power usage in mW at the measurement point.
+ power: uint64 (id: 4);
+}
+
+// Table to track the current state of a compute platform's hardware.
+table HardwareStats {
+ // Per-zone thermal stats
+ thermal_zones: [ThermalZone] (id: 0);
+ // RPM of the fan
+ fan_speed: uint64 (id: 1);
+ // Electrical readings
+ electrical_readings: [ElectricalReading] (id: 2);
+}
+
+root_type HardwareStats;