blob: 8e3d4513d36bb0396e95c00e29a323a106f67ad5 [file] [log] [blame]
Stephan Massaltf84cf812019-12-31 14:14:50 -08001#include <iomanip>
2#include <iostream>
James Kuszmaulc7bb1652022-06-22 11:18:49 -07003#include <queue>
Stephan Massaltf84cf812019-12-31 14:14:50 -08004
Austin Schuhc99e1392021-03-30 22:59:24 -07005#include "absl/strings/str_format.h"
Austin Schuhb06f03b2021-02-17 22:00:37 -08006#include "aos/events/logging/log_reader.h"
Stephan Massaltf84cf812019-12-31 14:14:50 -08007#include "aos/events/simulated_event_loop.h"
8#include "aos/init.h"
9#include "aos/json_to_flatbuffer.h"
10#include "aos/time/time.h"
11#include "gflags/gflags.h"
12
Stephan Massaltf84cf812019-12-31 14:14:50 -080013DEFINE_string(
14 name, "",
15 "Name to match for printing out channels. Empty means no name filter.");
16
Austin Schuh6f3babe2020-01-26 20:34:50 -080017DEFINE_string(node, "", "Node to print stats out for.");
18
milind upadhyay38fe3cd2021-03-27 15:30:53 -070019DEFINE_bool(excessive_size_only, false,
20 "Only print channels that have a set max message size that is more "
21 "than double of the max message size.");
22
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -070023// This class implements a histogram for tracking message period
24// percentiles.
Austin Schuhc99e1392021-03-30 22:59:24 -070025class Histogram {
26 public:
27 Histogram(size_t buckets = 1024)
28 : max_value_bucket_(0.01), values_(buckets, 0.0), counts_(buckets, 0) {}
29
30 // Adds a new sample to the histogram, potentially downsampling the existing
31 // data.
32 void Add(double value) {
33 if (value < max_value_bucket_) {
34 const ssize_t bucket = static_cast<size_t>(
35 std::floor(value * values_.size() / max_value_bucket_));
36 CHECK_GE(bucket, 0);
37 CHECK_LT(bucket, static_cast<ssize_t>(values_.size()));
38 values_[bucket] += value;
39 if (all_counts_ == 0 || value > max_value_) {
40 max_value_ = value;
41 }
42 if (all_counts_ == 0 || value < min_value_) {
43 min_value_ = value;
44 }
45 ++counts_[bucket];
46 ++all_counts_;
47 } else {
48 // Double all the bucket sizes by merging adjacent buckets and doubling
49 // the max value. If this isn't enough, we'll recurse inside Add and
50 // do it again until it fits.
51 max_value_bucket_ *= 2.0;
52 for (size_t bucket = 0; bucket < values_.size() / 2; ++bucket) {
53 values_[bucket] = values_[bucket * 2] + values_[bucket * 2 + 1];
54 counts_[bucket] = counts_[bucket * 2] + counts_[bucket * 2 + 1];
55 }
56 for (size_t bucket = values_.size() / 2; bucket < values_.size();
57 ++bucket) {
58 values_[bucket] = 0.0;
59 counts_[bucket] = 0;
60 }
61 Add(value);
62 }
63 }
64
65 // Prints out the percentiles for a couple of critical numbers.
66 std::string Percentile() const {
67 const size_t percentile5 = all_counts_ / 20;
68 double percentile5_value = 0.0;
69 const size_t percentile50 = all_counts_ / 2;
70 double percentile50_value = 0.0;
71 const size_t percentile95 = all_counts_ - percentile5;
72 double percentile95_value = 0.0;
73
74 size_t count = 0;
75 for (size_t i = 0; i < values_.size(); ++i) {
76 if (count < percentile5 && count + counts_[i] >= percentile5) {
77 percentile5_value = values_[i] / counts_[i];
78 }
79 if (count < percentile50 && count + counts_[i] >= percentile50) {
80 percentile50_value = values_[i] / counts_[i];
81 }
82 if (count < percentile95 && count + counts_[i] >= percentile95) {
83 percentile95_value = values_[i] / counts_[i];
84 }
85 count += counts_[i];
86 }
87
88 // Assume here that these are periods in seconds. Convert to ms for
89 // readability. This isn't super generic, but that's fine for now.
90 return absl::StrFormat(
91 "[max %.3fms 95%%:%.3fms 50%%:%.3fms 5%%:%.3fms min %.3fms]",
92 max_value_ * 1000., percentile95_value * 1000.,
93 percentile50_value * 1000., percentile5_value * 1000.,
94 min_value_ * 1000.);
95 }
96
97 private:
98 // The size of the largest bucket. Used to figure out which bucket something
99 // goes into.
100 double max_value_bucket_;
101 // Max and min values overall we have seen.
102 double max_value_ = 0;
103 double min_value_ = 0;
104 // A list of the sum of values and counts for those per bucket.
105 std::vector<double> values_;
106 std::vector<size_t> counts_;
107 // Total number of samples.
108 size_t all_counts_ = 0;
109};
110
111class ChannelStats {
112 public:
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700113 ChannelStats(const aos::Channel *channel, const aos::Node *destination_node,
114 aos::SimulatedEventLoopFactory *factory)
115 : channel_(channel),
116 config_(factory->configuration()),
117 factory_(factory),
118 destination_node_(destination_node) {
119 // Multi-node channel
120 if (channel_->has_source_node() && channel_->has_destination_nodes() &&
121 channel_->destination_nodes()->size() > 0) {
122 CHECK(destination_node_)
123 << "Should have destination node for forwarded channel: "
124 << channel_->name()->string_view();
125 source_node_ = aos::configuration::GetNode(
126 config_, channel_->source_node()->string_view());
127 CHECK(source_node_) << "Node not in config: "
128 << channel_->source_node()->string_view();
129 }
130 }
Austin Schuhc99e1392021-03-30 22:59:24 -0700131
132 // Adds a sample to the statistics.
133 void Add(const aos::Context &context) {
134 max_message_size_ = std::max(max_message_size_, context.size);
135 total_message_size_ += context.size;
136 total_num_messages_++;
137 channel_end_time_ = context.realtime_event_time;
138 first_message_time_ =
139 std::min(first_message_time_, context.monotonic_event_time);
140 if (current_message_time_ != aos::monotonic_clock::min_time) {
141 histogram_.Add(std::chrono::duration<double>(
142 context.monotonic_event_time - current_message_time_)
143 .count());
144 }
145 current_message_time_ = context.monotonic_event_time;
James Kuszmaulc7bb1652022-06-22 11:18:49 -0700146 channel_storage_duration_messages_.push(current_message_time_);
147 while (channel_storage_duration_messages_.front() +
148 std::chrono::nanoseconds(config_->channel_storage_duration()) <=
149 current_message_time_) {
150 channel_storage_duration_messages_.pop();
151 }
152 max_messages_per_period_ = std::max(
153 max_messages_per_period_, channel_storage_duration_messages_.size());
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700154
155 // Only count latency if this message is forwarded and the remote time was
156 // filled
157 if (source_node_ != nullptr &&
158 context.monotonic_remote_time != context.monotonic_event_time) {
159 // Convert times to distributed clock so they can be compared across nodes
160 const aos::distributed_clock::time_point remote_time =
161 factory_->GetNodeEventLoopFactory(source_node_)
162 ->ToDistributedClock(context.monotonic_remote_time);
163
164 const aos::distributed_clock::time_point event_time =
165 factory_->GetNodeEventLoopFactory(destination_node_)
166 ->ToDistributedClock(context.monotonic_event_time);
167 // Add the current latency to the sum
168 total_latency_ += event_time - remote_time;
169
170 num_messages_with_remote_++;
171 }
Austin Schuhc99e1392021-03-30 22:59:24 -0700172 }
173
174 std::string Percentile() const { return histogram_.Percentile(); }
175
176 double SecondsActive() const {
177 return aos::time::DurationInSeconds(current_message_time_ -
178 first_message_time_);
179 }
180
181 size_t max_message_size() const { return max_message_size_; }
182 size_t total_num_messages() const { return total_num_messages_; }
183
184 double avg_messages_per_sec() const {
185 return total_num_messages_ / SecondsActive();
186 }
James Kuszmaulc7bb1652022-06-22 11:18:49 -0700187 double max_messages_per_sec() const {
188 return max_messages_per_period_ /
189 std::min(SecondsActive(),
190 1e-9 * config_->channel_storage_duration());
191 }
Austin Schuhc99e1392021-03-30 22:59:24 -0700192 size_t avg_message_size() const {
193 return total_message_size_ / total_num_messages_;
194 }
James Kuszmaul6abc7442021-10-24 13:46:04 -0700195 size_t avg_message_bandwidth() const {
196 return total_message_size_ / SecondsActive();
197 }
Austin Schuhc99e1392021-03-30 22:59:24 -0700198
199 aos::realtime_clock::time_point channel_end_time() const {
200 return channel_end_time_;
201 }
202
203 const aos::Channel *channel() const { return channel_; }
204
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700205 std::string AvgLatency() {
206 if (num_messages_with_remote_ == 0) {
207 return "";
208 }
209
210 std::stringstream ss;
211 ss << std::setprecision(3);
212
213 const double avg_latency =
214 std::chrono::duration<double, std::milli>(total_latency_).count() /
215 num_messages_with_remote_;
216 ss << '[' << source_node_->name()->string_view() << "->"
217 << destination_node_->name()->string_view() << " " << avg_latency
218 << "ms latency avg]";
219
220 return ss.str();
221 }
222
Austin Schuhc99e1392021-03-30 22:59:24 -0700223 private:
Stephan Massaltf84cf812019-12-31 14:14:50 -0800224 // pointer to the channel for which stats are collected
Austin Schuhc99e1392021-03-30 22:59:24 -0700225 const aos::Channel *channel_;
James Kuszmaulc7bb1652022-06-22 11:18:49 -0700226 const aos::Configuration *config_;
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700227 aos::SimulatedEventLoopFactory *factory_;
Austin Schuhc99e1392021-03-30 22:59:24 -0700228 aos::realtime_clock::time_point channel_end_time_ =
Stephan Massaltf84cf812019-12-31 14:14:50 -0800229 aos::realtime_clock::min_time;
Austin Schuhc99e1392021-03-30 22:59:24 -0700230 aos::monotonic_clock::time_point first_message_time_ =
Stephan Massaltf84cf812019-12-31 14:14:50 -0800231 // needs to be higher than time in the logfile!
232 aos::monotonic_clock::max_time;
Austin Schuhc99e1392021-03-30 22:59:24 -0700233 aos::monotonic_clock::time_point current_message_time_ =
Stephan Massaltf84cf812019-12-31 14:14:50 -0800234 aos::monotonic_clock::min_time;
Austin Schuhc99e1392021-03-30 22:59:24 -0700235
James Kuszmaulc7bb1652022-06-22 11:18:49 -0700236 // Buffer of the last N seconds of messages, for N = channel_storage_duration.
237 std::queue<aos::monotonic_clock::time_point>
238 channel_storage_duration_messages_;
239 size_t max_messages_per_period_ = 0;
240
Stephan Massaltf84cf812019-12-31 14:14:50 -0800241 // channel stats to collect per channel
Austin Schuhc99e1392021-03-30 22:59:24 -0700242 int total_num_messages_ = 0;
243 size_t max_message_size_ = 0;
244 size_t total_message_size_ = 0;
245
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700246 // Count of messages which had remote timestamps
247 size_t num_messages_with_remote_ = 0;
248 // Sum of latencies in all messages sent on this channel if multinode
249 aos::distributed_clock::duration total_latency_;
250
Austin Schuhc99e1392021-03-30 22:59:24 -0700251 Histogram histogram_;
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700252
253 const aos::Node *source_node_ = nullptr;
254 const aos::Node *destination_node_;
Stephan Massaltf84cf812019-12-31 14:14:50 -0800255};
256
257struct LogfileStats {
258 // All relevant stats on to logfile level
259 size_t logfile_length = 0;
260 int total_log_messages = 0;
261 aos::realtime_clock::time_point logfile_end_time =
262 aos::realtime_clock::min_time;
263};
264
265int main(int argc, char **argv) {
266 gflags::SetUsageMessage(
Ravago Jones8bab1842020-12-12 17:36:39 -0800267 "Usage: \n"
268 " log_stats [args] logfile1 logfile2 ...\n"
Stephan Massaltf84cf812019-12-31 14:14:50 -0800269 "This program provides statistics on a given log file. Supported "
270 "statistics are:\n"
271 " - Logfile start time;\n"
272 " - Total messages per channel/type;\n"
273 " - Max message size per channel/type;\n"
274 " - Frequency of messages per second;\n"
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700275 " - Total logfile size and number of messages;\n"
276 " - Average latency per forwarded channel/type.\n"
277 "Pass a logfile (path/filename) and use --name "
Stephan Massaltf84cf812019-12-31 14:14:50 -0800278 "flag to specify a channel to listen on.");
279
280 aos::InitGoogle(&argc, &argv);
281
Ravago Jones8bab1842020-12-12 17:36:39 -0800282 if (argc < 2) {
283 LOG(FATAL) << "Expected at least 1 logfile as an argument.";
284 }
285
286 // find logfiles
287 std::vector<std::string> unsorted_logfiles =
288 aos::logger::FindLogs(argc, argv);
289
290 // sort logfiles
291 const std::vector<aos::logger::LogFile> logfiles =
292 aos::logger::SortParts(unsorted_logfiles);
293
294 // open logfiles
295 aos::logger::LogReader reader(logfiles);
296
Stephan Massaltf84cf812019-12-31 14:14:50 -0800297 LogfileStats logfile_stats;
298 std::vector<ChannelStats> channel_stats;
299
Stephan Massaltf84cf812019-12-31 14:14:50 -0800300 aos::SimulatedEventLoopFactory log_reader_factory(reader.configuration());
301 reader.Register(&log_reader_factory);
302
Austin Schuh6f3babe2020-01-26 20:34:50 -0800303 const aos::Node *node = nullptr;
304
305 if (aos::configuration::MultiNode(reader.configuration())) {
306 if (FLAGS_node.empty()) {
307 LOG(INFO) << "Need a --node specified. The log file has:";
Austin Schuh07676622021-01-21 18:59:17 -0800308 for (const aos::Node *node : reader.LoggedNodes()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800309 LOG(INFO) << " " << node->name()->string_view();
310 }
Austin Schuh8c7f14b2021-01-21 19:01:54 -0800311 reader.Deregister();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800312 return 1;
313 } else {
314 node = aos::configuration::GetNode(reader.configuration(), FLAGS_node);
315 }
316 }
317
Stephan Massaltf84cf812019-12-31 14:14:50 -0800318 // Make an eventloop for retrieving stats
319 std::unique_ptr<aos::EventLoop> stats_event_loop =
Austin Schuh6f3babe2020-01-26 20:34:50 -0800320 log_reader_factory.MakeEventLoop("logstats", node);
Stephan Massaltf84cf812019-12-31 14:14:50 -0800321 stats_event_loop->SkipTimingReport();
Tyler Chatow67ddb032020-01-12 14:30:04 -0800322 stats_event_loop->SkipAosLog();
Stephan Massaltf84cf812019-12-31 14:14:50 -0800323
324 // Read channel info and store in vector
325 bool found_channel = false;
326 const flatbuffers::Vector<flatbuffers::Offset<aos::Channel>> *channels =
327 reader.configuration()->channels();
328
329 int it = 0; // iterate through the channel_stats
330 for (flatbuffers::uoffset_t i = 0; i < channels->size(); i++) {
331 const aos::Channel *channel = channels->Get(i);
Austin Schuh76db3fa2020-03-07 17:02:44 -0800332 if (!aos::configuration::ChannelIsReadableOnNode(
333 channel, stats_event_loop->node())) {
334 continue;
Stephan Massaltf84cf812019-12-31 14:14:50 -0800335 }
Austin Schuh76db3fa2020-03-07 17:02:44 -0800336
337 if (channel->name()->string_view().find(FLAGS_name) == std::string::npos) {
338 continue;
339 }
340
341 // Add a record to the stats vector.
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700342 channel_stats.push_back({channel, node, &log_reader_factory});
Austin Schuh76db3fa2020-03-07 17:02:44 -0800343 // Lambda to read messages and parse for information
Brian Silvermanad1bce02020-03-05 14:23:01 -0800344 stats_event_loop->MakeRawNoArgWatcher(
345 channel,
346 [&logfile_stats, &channel_stats, it](const aos::Context &context) {
Austin Schuhc99e1392021-03-30 22:59:24 -0700347 channel_stats[it].Add(context);
348
349 // Update the overall logfile statistics
Brian Silvermanad1bce02020-03-05 14:23:01 -0800350 logfile_stats.logfile_length += context.size;
351 });
Austin Schuh76db3fa2020-03-07 17:02:44 -0800352 it++;
353 // TODO (Stephan): Frequency of messages per second
354 // - Sliding window
355 // - Max / Deviation
356 found_channel = true;
Stephan Massaltf84cf812019-12-31 14:14:50 -0800357 }
358 if (!found_channel) {
359 LOG(FATAL) << "Could not find any channels";
360 }
361
362 log_reader_factory.Run();
363
milind upadhyay38fe3cd2021-03-27 15:30:53 -0700364 std::cout << std::endl;
Austin Schuhc99e1392021-03-30 22:59:24 -0700365
Stephan Massaltf84cf812019-12-31 14:14:50 -0800366 // Print out the stats per channel and for the logfile
367 for (size_t i = 0; i != channel_stats.size(); i++) {
Austin Schuhc99e1392021-03-30 22:59:24 -0700368 if (!FLAGS_excessive_size_only ||
369 (channel_stats[i].max_message_size() * 2) <
370 static_cast<size_t>(channel_stats[i].channel()->max_size())) {
371 if (channel_stats[i].total_num_messages() > 0) {
372 std::cout << channel_stats[i].channel()->name()->string_view() << " "
373 << channel_stats[i].channel()->type()->string_view() << "\n";
milind upadhyay38fe3cd2021-03-27 15:30:53 -0700374
Austin Schuhc99e1392021-03-30 22:59:24 -0700375 logfile_stats.total_log_messages +=
376 channel_stats[i].total_num_messages();
377 logfile_stats.logfile_end_time =
378 std::max(logfile_stats.logfile_end_time,
379 channel_stats[i].channel_end_time());
380
milind upadhyay38fe3cd2021-03-27 15:30:53 -0700381 if (!FLAGS_excessive_size_only) {
Austin Schuhc99e1392021-03-30 22:59:24 -0700382 std::cout << " " << channel_stats[i].total_num_messages()
383 << " msgs, " << channel_stats[i].avg_messages_per_sec()
James Kuszmaulc7bb1652022-06-22 11:18:49 -0700384 << "hz avg, " << channel_stats[i].max_messages_per_sec()
385 << "hz max, " << channel_stats[i].channel()->frequency()
386 << "hz configured max";
milind upadhyay38fe3cd2021-03-27 15:30:53 -0700387 }
Austin Schuhc99e1392021-03-30 22:59:24 -0700388 std::cout << " " << channel_stats[i].avg_message_size()
Austin Schuh60e77942022-05-16 17:48:24 -0700389 << " bytes avg, " << channel_stats[i].avg_message_bandwidth()
James Kuszmaul6abc7442021-10-24 13:46:04 -0700390 << " bytes/sec avg, " << channel_stats[i].max_message_size()
Austin Schuhc99e1392021-03-30 22:59:24 -0700391 << " bytes max / " << channel_stats[i].channel()->max_size()
Milind Upadhyay3d13a1a2022-08-04 10:58:32 -0700392 << "bytes, " << channel_stats[i].Percentile() << ", "
393 << channel_stats[i].AvgLatency();
milind upadhyay38fe3cd2021-03-27 15:30:53 -0700394 std::cout << std::endl;
395 }
Stephan Massaltf84cf812019-12-31 14:14:50 -0800396 }
397 }
398 std::cout << std::setfill('-') << std::setw(80) << "-"
Ravago Jones8bab1842020-12-12 17:36:39 -0800399 << "\nLogfile statistics:\n"
Austin Schuh76db3fa2020-03-07 17:02:44 -0800400 << "Log starts at:\t" << reader.realtime_start_time(node) << "\n"
Stephan Massaltf84cf812019-12-31 14:14:50 -0800401 << "Log ends at:\t" << logfile_stats.logfile_end_time << "\n"
402 << "Log file size:\t" << logfile_stats.logfile_length << "\n"
403 << "Total messages:\t" << logfile_stats.total_log_messages << "\n";
404
405 // Cleanup the created processes
406 reader.Deregister();
Austin Schuhae87e312020-08-01 16:15:01 -0700407
Stephan Massaltf84cf812019-12-31 14:14:50 -0800408 return 0;
409}