Add histogram support to log_stats
This lets us get a better sense of what is going on with our message
rates. Min, max, average aren't quite enough.
Change-Id: I0d89af3fc461d183ad357a2164242ab44b18b992
diff --git a/aos/events/logging/log_stats.cc b/aos/events/logging/log_stats.cc
index f408e27..130a11f 100644
--- a/aos/events/logging/log_stats.cc
+++ b/aos/events/logging/log_stats.cc
@@ -1,6 +1,7 @@
#include <iomanip>
#include <iostream>
+#include "absl/strings/str_format.h"
#include "aos/events/logging/log_reader.h"
#include "aos/events/simulated_event_loop.h"
#include "aos/init.h"
@@ -18,23 +19,153 @@
"Only print channels that have a set max message size that is more "
"than double of the max message size.");
-// define struct to hold all information
-struct ChannelStats {
+// This class implements a histogram for tracking message period percentiles.
+class Histogram {
+ public:
+ Histogram(size_t buckets = 1024)
+ : max_value_bucket_(0.01), values_(buckets, 0.0), counts_(buckets, 0) {}
+
+ // Adds a new sample to the histogram, potentially downsampling the existing
+ // data.
+ void Add(double value) {
+ if (value < max_value_bucket_) {
+ const ssize_t bucket = static_cast<size_t>(
+ std::floor(value * values_.size() / max_value_bucket_));
+ CHECK_GE(bucket, 0);
+ CHECK_LT(bucket, static_cast<ssize_t>(values_.size()));
+ values_[bucket] += value;
+ if (all_counts_ == 0 || value > max_value_) {
+ max_value_ = value;
+ }
+ if (all_counts_ == 0 || value < min_value_) {
+ min_value_ = value;
+ }
+ ++counts_[bucket];
+ ++all_counts_;
+ } else {
+ // Double all the bucket sizes by merging adjacent buckets and doubling
+ // the max value. If this isn't enough, we'll recurse inside Add and
+ // do it again until it fits.
+ max_value_bucket_ *= 2.0;
+ for (size_t bucket = 0; bucket < values_.size() / 2; ++bucket) {
+ values_[bucket] = values_[bucket * 2] + values_[bucket * 2 + 1];
+ counts_[bucket] = counts_[bucket * 2] + counts_[bucket * 2 + 1];
+ }
+ for (size_t bucket = values_.size() / 2; bucket < values_.size();
+ ++bucket) {
+ values_[bucket] = 0.0;
+ counts_[bucket] = 0;
+ }
+ Add(value);
+ }
+ }
+
+ // Prints out the percentiles for a couple of critical numbers.
+ std::string Percentile() const {
+ const size_t percentile5 = all_counts_ / 20;
+ double percentile5_value = 0.0;
+ const size_t percentile50 = all_counts_ / 2;
+ double percentile50_value = 0.0;
+ const size_t percentile95 = all_counts_ - percentile5;
+ double percentile95_value = 0.0;
+
+ size_t count = 0;
+ for (size_t i = 0; i < values_.size(); ++i) {
+ if (count < percentile5 && count + counts_[i] >= percentile5) {
+ percentile5_value = values_[i] / counts_[i];
+ }
+ if (count < percentile50 && count + counts_[i] >= percentile50) {
+ percentile50_value = values_[i] / counts_[i];
+ }
+ if (count < percentile95 && count + counts_[i] >= percentile95) {
+ percentile95_value = values_[i] / counts_[i];
+ }
+ count += counts_[i];
+ }
+
+ // Assume here that these are periods in seconds. Convert to ms for
+ // readability. This isn't super generic, but that's fine for now.
+ return absl::StrFormat(
+ "[max %.3fms 95%%:%.3fms 50%%:%.3fms 5%%:%.3fms min %.3fms]",
+ max_value_ * 1000., percentile95_value * 1000.,
+ percentile50_value * 1000., percentile5_value * 1000.,
+ min_value_ * 1000.);
+ }
+
+ private:
+ // The size of the largest bucket. Used to figure out which bucket something
+ // goes into.
+ double max_value_bucket_;
+ // Max and min values overall we have seen.
+ double max_value_ = 0;
+ double min_value_ = 0;
+ // A list of the sum of values and counts for those per bucket.
+ std::vector<double> values_;
+ std::vector<size_t> counts_;
+ // Total number of samples.
+ size_t all_counts_ = 0;
+};
+
+class ChannelStats {
+ public:
+ ChannelStats(const aos::Channel *channel) : channel_(channel) {}
+
+ // Adds a sample to the statistics.
+ void Add(const aos::Context &context) {
+ max_message_size_ = std::max(max_message_size_, context.size);
+ total_message_size_ += context.size;
+ total_num_messages_++;
+ channel_end_time_ = context.realtime_event_time;
+ first_message_time_ =
+ std::min(first_message_time_, context.monotonic_event_time);
+ if (current_message_time_ != aos::monotonic_clock::min_time) {
+ histogram_.Add(std::chrono::duration<double>(
+ context.monotonic_event_time - current_message_time_)
+ .count());
+ }
+ current_message_time_ = context.monotonic_event_time;
+ }
+
+ std::string Percentile() const { return histogram_.Percentile(); }
+
+ double SecondsActive() const {
+ return aos::time::DurationInSeconds(current_message_time_ -
+ first_message_time_);
+ }
+
+ size_t max_message_size() const { return max_message_size_; }
+ size_t total_num_messages() const { return total_num_messages_; }
+
+ double avg_messages_per_sec() const {
+ return total_num_messages_ / SecondsActive();
+ }
+ size_t avg_message_size() const {
+ return total_message_size_ / total_num_messages_;
+ }
+
+ aos::realtime_clock::time_point channel_end_time() const {
+ return channel_end_time_;
+ }
+
+ const aos::Channel *channel() const { return channel_; }
+
+ private:
// pointer to the channel for which stats are collected
- const aos::Channel *channel;
- aos::realtime_clock::time_point channel_end_time =
+ const aos::Channel *channel_;
+ aos::realtime_clock::time_point channel_end_time_ =
aos::realtime_clock::min_time;
- aos::monotonic_clock::time_point first_message_time =
+ aos::monotonic_clock::time_point first_message_time_ =
// needs to be higher than time in the logfile!
aos::monotonic_clock::max_time;
- aos::monotonic_clock::time_point current_message_time =
+ aos::monotonic_clock::time_point current_message_time_ =
aos::monotonic_clock::min_time;
+
// channel stats to collect per channel
- int total_num_messages = 0;
- size_t max_message_size = 0;
- size_t total_message_size = 0;
- double avg_messages_sec = 0.0; // TODO in Lambda, now in stats overview.
- double max_messages_sec = 0.0; // TODO in Lambda
+ int total_num_messages_ = 0;
+ size_t max_message_size_ = 0;
+ size_t total_message_size_ = 0;
+
+ Histogram histogram_;
};
struct LogfileStats {
@@ -126,17 +257,9 @@
stats_event_loop->MakeRawNoArgWatcher(
channel,
[&logfile_stats, &channel_stats, it](const aos::Context &context) {
- channel_stats[it].max_message_size =
- std::max(channel_stats[it].max_message_size, context.size);
- channel_stats[it].total_message_size += context.size;
- channel_stats[it].total_num_messages++;
- // asume messages are send in sequence per channel
- channel_stats[it].channel_end_time = context.realtime_event_time;
- channel_stats[it].first_message_time =
- std::min(channel_stats[it].first_message_time,
- context.monotonic_event_time);
- channel_stats[it].current_message_time = context.monotonic_event_time;
- // update the overall logfile statistics
+ channel_stats[it].Add(context);
+
+ // Update the overall logfile statistics
logfile_stats.logfile_length += context.size;
});
it++;
@@ -152,58 +275,34 @@
log_reader_factory.Run();
std::cout << std::endl;
+
// Print out the stats per channel and for the logfile
for (size_t i = 0; i != channel_stats.size(); i++) {
- if (channel_stats[i].total_num_messages > 0) {
- double sec_active =
- aos::time::DurationInSeconds(channel_stats[i].current_message_time -
- channel_stats[i].first_message_time);
- channel_stats[i].avg_messages_sec =
- (channel_stats[i].total_num_messages / sec_active);
- logfile_stats.total_log_messages += channel_stats[i].total_num_messages;
- logfile_stats.logfile_end_time = std::max(
- logfile_stats.logfile_end_time, channel_stats[i].channel_end_time);
+ if (!FLAGS_excessive_size_only ||
+ (channel_stats[i].max_message_size() * 2) <
+ static_cast<size_t>(channel_stats[i].channel()->max_size())) {
+ if (channel_stats[i].total_num_messages() > 0) {
+ std::cout << channel_stats[i].channel()->name()->string_view() << " "
+ << channel_stats[i].channel()->type()->string_view() << "\n";
- if (!FLAGS_excessive_size_only ||
- (channel_stats[i].max_message_size * 2) <
- static_cast<unsigned long>(
- channel_stats[i].channel->max_size())) {
- std::cout << "Channel name: "
- << channel_stats[i].channel->name()->string_view()
- << "\tMsg type: "
- << channel_stats[i].channel->type()->string_view() << "\n";
+ logfile_stats.total_log_messages +=
+ channel_stats[i].total_num_messages();
+ logfile_stats.logfile_end_time =
+ std::max(logfile_stats.logfile_end_time,
+ channel_stats[i].channel_end_time());
+
if (!FLAGS_excessive_size_only) {
- std::cout << "Number of msg: " << channel_stats[i].total_num_messages
- << std::setprecision(3) << std::fixed
- << "\tAvg msg per sec: "
- << channel_stats[i].avg_messages_sec
- << "\tSet max msg frequency: "
- << channel_stats[i].channel->frequency() << "\n";
+ std::cout << " " << channel_stats[i].total_num_messages()
+ << " msgs, " << channel_stats[i].avg_messages_per_sec()
+ << "hz avg, " << channel_stats[i].channel()->frequency()
+ << "hz max";
}
- std::cout << "Avg msg size: "
- << (channel_stats[i].total_message_size /
- channel_stats[i].total_num_messages)
- << "\tMax msg size: " << channel_stats[i].max_message_size
- << "\tSet max msg size: "
- << channel_stats[i].channel->max_size() << "\n";
- if (!FLAGS_excessive_size_only) {
- std::cout << "First msg time: " << channel_stats[i].first_message_time
- << "\tLast msg time: "
- << channel_stats[i].current_message_time
- << "\tSeconds active: " << sec_active << "sec\n";
- }
+ std::cout << " " << channel_stats[i].avg_message_size()
+ << " bytes avg, " << channel_stats[i].max_message_size()
+ << " bytes max / " << channel_stats[i].channel()->max_size()
+ << "bytes " << channel_stats[i].Percentile();
std::cout << std::endl;
}
- } else {
- std::cout << "Channel name: "
- << channel_stats[i].channel->name()->string_view() << "\t"
- << "Msg type: "
- << channel_stats[i].channel->type()->string_view() << "\n"
- << "Set max msg frequency: "
- << channel_stats[i].channel->frequency() << "\t"
- << "Set max msg size: " << channel_stats[i].channel->max_size()
- << "\n--- No messages in channel ---"
- << "\n";
}
}
std::cout << std::setfill('-') << std::setw(80) << "-"