Add histogram support to log_stats

This lets us get a better sense of what is going on with our message
rates.  Min, max, average aren't quite enough.

Change-Id: I0d89af3fc461d183ad357a2164242ab44b18b992
diff --git a/aos/events/logging/log_stats.cc b/aos/events/logging/log_stats.cc
index f408e27..130a11f 100644
--- a/aos/events/logging/log_stats.cc
+++ b/aos/events/logging/log_stats.cc
@@ -1,6 +1,7 @@
 #include <iomanip>
 #include <iostream>
 
+#include "absl/strings/str_format.h"
 #include "aos/events/logging/log_reader.h"
 #include "aos/events/simulated_event_loop.h"
 #include "aos/init.h"
@@ -18,23 +19,153 @@
             "Only print channels that have a set max message size that is more "
             "than double of the max message size.");
 
-// define struct to hold all information
-struct ChannelStats {
+// This class implements a histogram for tracking message period percentiles.
+class Histogram {
+ public:
+  Histogram(size_t buckets = 1024)
+      : max_value_bucket_(0.01), values_(buckets, 0.0), counts_(buckets, 0) {}
+
+  // Adds a new sample to the histogram, potentially downsampling the existing
+  // data.
+  void Add(double value) {
+    if (value < max_value_bucket_) {
+      const ssize_t bucket = static_cast<size_t>(
+          std::floor(value * values_.size() / max_value_bucket_));
+      CHECK_GE(bucket, 0);
+      CHECK_LT(bucket, static_cast<ssize_t>(values_.size()));
+      values_[bucket] += value;
+      if (all_counts_ == 0 || value > max_value_) {
+        max_value_ = value;
+      }
+      if (all_counts_ == 0 || value < min_value_) {
+        min_value_ = value;
+      }
+      ++counts_[bucket];
+      ++all_counts_;
+    } else {
+      // Double all the bucket sizes by merging adjacent buckets and doubling
+      // the max value.  If this isn't enough, we'll recurse inside Add and
+      // do it again until it fits.
+      max_value_bucket_ *= 2.0;
+      for (size_t bucket = 0; bucket < values_.size() / 2; ++bucket) {
+        values_[bucket] = values_[bucket * 2] + values_[bucket * 2 + 1];
+        counts_[bucket] = counts_[bucket * 2] + counts_[bucket * 2 + 1];
+      }
+      for (size_t bucket = values_.size() / 2; bucket < values_.size();
+           ++bucket) {
+        values_[bucket] = 0.0;
+        counts_[bucket] = 0;
+      }
+      Add(value);
+    }
+  }
+
+  // Prints out the percentiles for a couple of critical numbers.
+  std::string Percentile() const {
+    const size_t percentile5 = all_counts_ / 20;
+    double percentile5_value = 0.0;
+    const size_t percentile50 = all_counts_ / 2;
+    double percentile50_value = 0.0;
+    const size_t percentile95 = all_counts_ - percentile5;
+    double percentile95_value = 0.0;
+
+    size_t count = 0;
+    for (size_t i = 0; i < values_.size(); ++i) {
+      if (count < percentile5 && count + counts_[i] >= percentile5) {
+        percentile5_value = values_[i] / counts_[i];
+      }
+      if (count < percentile50 && count + counts_[i] >= percentile50) {
+        percentile50_value = values_[i] / counts_[i];
+      }
+      if (count < percentile95 && count + counts_[i] >= percentile95) {
+        percentile95_value = values_[i] / counts_[i];
+      }
+      count += counts_[i];
+    }
+
+    // Assume here that these are periods in seconds.  Convert to ms for
+    // readability.  This isn't super generic, but that's fine for now.
+    return absl::StrFormat(
+        "[max %.3fms 95%%:%.3fms 50%%:%.3fms 5%%:%.3fms min %.3fms]",
+        max_value_ * 1000., percentile95_value * 1000.,
+        percentile50_value * 1000., percentile5_value * 1000.,
+        min_value_ * 1000.);
+  }
+
+ private:
+  // The size of the largest bucket.  Used to figure out which bucket something
+  // goes into.
+  double max_value_bucket_;
+  // Max and min values overall we have seen.
+  double max_value_ = 0;
+  double min_value_ = 0;
+  // A list of the sum of values and counts for those per bucket.
+  std::vector<double> values_;
+  std::vector<size_t> counts_;
+  // Total number of samples.
+  size_t all_counts_ = 0;
+};
+
+class ChannelStats {
+ public:
+  ChannelStats(const aos::Channel *channel) : channel_(channel) {}
+
+  // Adds a sample to the statistics.
+  void Add(const aos::Context &context) {
+    max_message_size_ = std::max(max_message_size_, context.size);
+    total_message_size_ += context.size;
+    total_num_messages_++;
+    channel_end_time_ = context.realtime_event_time;
+    first_message_time_ =
+        std::min(first_message_time_, context.monotonic_event_time);
+    if (current_message_time_ != aos::monotonic_clock::min_time) {
+      histogram_.Add(std::chrono::duration<double>(
+                         context.monotonic_event_time - current_message_time_)
+                         .count());
+    }
+    current_message_time_ = context.monotonic_event_time;
+  }
+
+  std::string Percentile() const { return histogram_.Percentile(); }
+
+  double SecondsActive() const {
+    return aos::time::DurationInSeconds(current_message_time_ -
+                                        first_message_time_);
+  }
+
+  size_t max_message_size() const { return max_message_size_; }
+  size_t total_num_messages() const { return total_num_messages_; }
+
+  double avg_messages_per_sec() const {
+    return total_num_messages_ / SecondsActive();
+  }
+  size_t avg_message_size() const {
+    return total_message_size_ / total_num_messages_;
+  }
+
+  aos::realtime_clock::time_point channel_end_time() const {
+    return channel_end_time_;
+  }
+
+  const aos::Channel *channel() const { return channel_; }
+
+ private:
   // pointer to the channel for which stats are collected
-  const aos::Channel *channel;
-  aos::realtime_clock::time_point channel_end_time =
+  const aos::Channel *channel_;
+  aos::realtime_clock::time_point channel_end_time_ =
       aos::realtime_clock::min_time;
-  aos::monotonic_clock::time_point first_message_time =
+  aos::monotonic_clock::time_point first_message_time_ =
       // needs to be higher than time in the logfile!
       aos::monotonic_clock::max_time;
-  aos::monotonic_clock::time_point current_message_time =
+  aos::monotonic_clock::time_point current_message_time_ =
       aos::monotonic_clock::min_time;
+
   // channel stats to collect per channel
-  int total_num_messages = 0;
-  size_t max_message_size = 0;
-  size_t total_message_size = 0;
-  double avg_messages_sec = 0.0;  // TODO in Lambda, now in stats overview.
-  double max_messages_sec = 0.0;  // TODO in Lambda
+  int total_num_messages_ = 0;
+  size_t max_message_size_ = 0;
+  size_t total_message_size_ = 0;
+
+  Histogram histogram_;
 };
 
 struct LogfileStats {
@@ -126,17 +257,9 @@
     stats_event_loop->MakeRawNoArgWatcher(
         channel,
         [&logfile_stats, &channel_stats, it](const aos::Context &context) {
-          channel_stats[it].max_message_size =
-              std::max(channel_stats[it].max_message_size, context.size);
-          channel_stats[it].total_message_size += context.size;
-          channel_stats[it].total_num_messages++;
-          // asume messages are send in sequence per channel
-          channel_stats[it].channel_end_time = context.realtime_event_time;
-          channel_stats[it].first_message_time =
-              std::min(channel_stats[it].first_message_time,
-                       context.monotonic_event_time);
-          channel_stats[it].current_message_time = context.monotonic_event_time;
-          // update the overall logfile statistics
+          channel_stats[it].Add(context);
+
+          // Update the overall logfile statistics
           logfile_stats.logfile_length += context.size;
         });
     it++;
@@ -152,58 +275,34 @@
   log_reader_factory.Run();
 
   std::cout << std::endl;
+
   // Print out the stats per channel and for the logfile
   for (size_t i = 0; i != channel_stats.size(); i++) {
-    if (channel_stats[i].total_num_messages > 0) {
-      double sec_active =
-          aos::time::DurationInSeconds(channel_stats[i].current_message_time -
-                                       channel_stats[i].first_message_time);
-      channel_stats[i].avg_messages_sec =
-          (channel_stats[i].total_num_messages / sec_active);
-      logfile_stats.total_log_messages += channel_stats[i].total_num_messages;
-      logfile_stats.logfile_end_time = std::max(
-          logfile_stats.logfile_end_time, channel_stats[i].channel_end_time);
+    if (!FLAGS_excessive_size_only ||
+        (channel_stats[i].max_message_size() * 2) <
+            static_cast<size_t>(channel_stats[i].channel()->max_size())) {
+      if (channel_stats[i].total_num_messages() > 0) {
+        std::cout << channel_stats[i].channel()->name()->string_view() << " "
+                  << channel_stats[i].channel()->type()->string_view() << "\n";
 
-      if (!FLAGS_excessive_size_only ||
-          (channel_stats[i].max_message_size * 2) <
-              static_cast<unsigned long>(
-                  channel_stats[i].channel->max_size())) {
-        std::cout << "Channel name: "
-                  << channel_stats[i].channel->name()->string_view()
-                  << "\tMsg type: "
-                  << channel_stats[i].channel->type()->string_view() << "\n";
+        logfile_stats.total_log_messages +=
+            channel_stats[i].total_num_messages();
+        logfile_stats.logfile_end_time =
+            std::max(logfile_stats.logfile_end_time,
+                     channel_stats[i].channel_end_time());
+
         if (!FLAGS_excessive_size_only) {
-          std::cout << "Number of msg: " << channel_stats[i].total_num_messages
-                    << std::setprecision(3) << std::fixed
-                    << "\tAvg msg per sec: "
-                    << channel_stats[i].avg_messages_sec
-                    << "\tSet max msg frequency: "
-                    << channel_stats[i].channel->frequency() << "\n";
+          std::cout << "   " << channel_stats[i].total_num_messages()
+                    << " msgs, " << channel_stats[i].avg_messages_per_sec()
+                    << "hz avg, " << channel_stats[i].channel()->frequency()
+                    << "hz max";
         }
-        std::cout << "Avg msg size: "
-                  << (channel_stats[i].total_message_size /
-                      channel_stats[i].total_num_messages)
-                  << "\tMax msg size: " << channel_stats[i].max_message_size
-                  << "\tSet max msg size: "
-                  << channel_stats[i].channel->max_size() << "\n";
-        if (!FLAGS_excessive_size_only) {
-          std::cout << "First msg time: " << channel_stats[i].first_message_time
-                    << "\tLast msg time: "
-                    << channel_stats[i].current_message_time
-                    << "\tSeconds active: " << sec_active << "sec\n";
-        }
+        std::cout << " " << channel_stats[i].avg_message_size()
+                  << " bytes avg, " << channel_stats[i].max_message_size()
+                  << " bytes max / " << channel_stats[i].channel()->max_size()
+                  << "bytes " << channel_stats[i].Percentile();
         std::cout << std::endl;
       }
-    } else {
-      std::cout << "Channel name: "
-                << channel_stats[i].channel->name()->string_view() << "\t"
-                << "Msg type: "
-                << channel_stats[i].channel->type()->string_view() << "\n"
-                << "Set max msg frequency: "
-                << channel_stats[i].channel->frequency() << "\t"
-                << "Set max msg size: " << channel_stats[i].channel->max_size()
-                << "\n--- No messages in channel ---"
-                << "\n";
     }
   }
   std::cout << std::setfill('-') << std::setw(80) << "-"