Blame - aos/events/logging/plot_logger_profile.py - RealtimeRoboticsGroup/test

blob: 125b308e57286ee92db70a2abe4f9337df19eacd [file] [log] [blame]

Maxwell Gumley	d26e629	2024-04-24 10:45:07 -0600	[diff] [blame^]	1	# Parse log profiling data and produce a graph showing encode times in time bins,
				2	# with a breakdown per message type.
				3
				4	import argparse
				5	import csv
				6	import math
				7	import os
				8	import webbrowser
				9	from dataclasses import dataclass, field
				10	from pathlib import Path
				11	from typing import Dict, List, Optional, Tuple
				12
				13	import numpy as np
				14	from bokeh.io import output_notebook
				15	from bokeh.models import ColumnDataSource, HoverTool, Legend, LegendItem
				16	from bokeh.palettes import Category20, Viridis256
				17	from bokeh.plotting import figure, show, output_file
				18	from tabulate import tabulate
				19
				20
				21	@dataclass
				22	class SeriesDetail:
				23	event_loop_times_s: List[float]
				24	encode_times_ms: List[float]
				25
				26
				27	def parse_csv_file(filepath: Path, max_lines: Optional[int],
				28	start_time: Optional[float],
				29	end_time: Optional[float]) -> Dict[str, SeriesDetail]:
				30	"""Parses the CSV file to extract needed data, respecting the maximum number of lines if provided."""
				31	data_by_type: Dict[str, SeriesDetail] = {}
				32
				33	with open(filepath, 'r') as file:
				34	reader = csv.reader(file)
				35	next(reader) # Skip the header line
				36
				37	line_count = 0
				38	for line in reader:
				39	if max_lines is not None and line_count >= max_lines:
				40	break
				41
				42	line_count += 1
				43
				44	assert len(line) > 0
				45
				46	# Note that channel_name and encoding_start_time_ns are not yet used.
				47	# They may be used here in the future, but for now they are helpful when
				48	# directly looking at the csv file.
				49	channel_name, channel_type, encode_duration_ns, encoding_start_time_ns, message_time_s = line
				50
				51	# Convert nanoseconds to milliseconds
				52	encode_duration_ms = float(encode_duration_ns) * 1e-6
				53	message_time_s = float(message_time_s)
				54
				55	if (start_time is not None and message_time_s < start_time):
				56	continue
				57	if (end_time is not None and message_time_s > end_time):
				58	continue
				59
				60	if channel_type in data_by_type:
				61	data_by_type[channel_type].encode_times_ms.append(
				62	encode_duration_ms)
				63	data_by_type[channel_type].event_loop_times_s.append(
				64	message_time_s)
				65	else:
				66	data_by_type[channel_type] = SeriesDetail(
				67	encode_times_ms=[encode_duration_ms],
				68	event_loop_times_s=[message_time_s])
				69
				70	return data_by_type
				71
				72
				73	@dataclass
				74	class DataBin:
				75	bin_range: str
				76	message_encode_times: Dict[str, float]
				77
				78
				79	@dataclass
				80	class BinnedData:
				81	bins: List[DataBin] = field(default_factory=list)
				82	top_type_names: List[str] = field(default_factory=list)
				83
				84
				85	def create_binned_data(data_by_type: Dict[str, SeriesDetail],
				86	num_bins: int = 25,
				87	top_n: int = 5) -> BinnedData:
				88	# Calculate total encoding times for each message type across the entire file.
				89	total_encode_times: Dict[str, float] = {
				90	message_type: sum(detail.encode_times_ms)
				91	for message_type, detail in data_by_type.items()
				92	}
				93
				94	# Determine the top N message types based on total encoding times.
				95	top_types: List[Tuple[str, float]] = sorted(total_encode_times.items(),
				96	key=lambda item: item[1],
				97	reverse=True)[:top_n]
				98	print(f"{top_types=}")
				99	top_type_names: List[str] = [type_name for type_name, _ in top_types]
				100
				101	# Find the global minimum and maximum times to establish bin edges.
				102	min_time: float = min(detail.event_loop_times_s[0]
				103	for detail in data_by_type.values())
				104	max_time: float = max(detail.event_loop_times_s[-1]
				105	for detail in data_by_type.values())
				106
				107	# Create bins.
				108	bins = np.linspace(min_time, max_time, num_bins + 1)
				109
				110	# Initialize the list of DataBin instances with the correct number of bins.
				111	binned_data = BinnedData(top_type_names=top_type_names)
				112	for i in range(num_bins):
				113	bin_range = f"{bins[i]:.2f} - {bins[i+1]:.2f}"
				114	binned_data.bins.append(
				115	DataBin(bin_range=bin_range,
				116	message_encode_times={
				117	name: 0
				118	for name in top_type_names + ['other']
				119	}))
				120
				121	# Populate binned_data with message encode times.
				122	for message_type, details in data_by_type.items():
				123	binned_indices = np.digitize(details.event_loop_times_s, bins) - 1
				124	# Correcting the bin indices that are out of range by being exactly on the maximum edge.
				125	binned_indices = np.minimum(binned_indices, num_bins - 1)
				126
				127	for idx, encode_time in enumerate(details.encode_times_ms):
				128	bin_index = binned_indices[idx]
				129	current_bin = binned_data.bins[bin_index]
				130	if message_type in top_type_names:
				131	current_bin.message_encode_times[message_type] += encode_time
				132	else:
				133	current_bin.message_encode_times['other'] += encode_time
				134
				135	return binned_data
				136
				137
				138	def print_binned_data(binned_data: BinnedData) -> None:
				139	# Extend headers for the table by replacing '.' with '\n.' for each message type name and
				140	# adding 'Total'.
				141	headers = ['Bin Range'] + [
				142	key.replace('.', '\n.')
				143	for key in binned_data.top_type_names + ['other']
				144	] + ['Total']
				145
				146	# Initialize the table data list.
				147	table_data = []
				148
				149	# Populate the table data with the values from each DataBin instance and calculate totals.
				150	for data_bin in binned_data.bins:
				151	# Initialize a row with the bin range.
				152	row = [data_bin.bin_range]
				153	# Add the total message encode times for each message type.
				154	encode_times = [
				155	data_bin.message_encode_times[message_type]
				156	for message_type in binned_data.top_type_names
				157	]
				158	other_time = data_bin.message_encode_times['other']
				159	row += encode_times + [other_time]
				160	# Calculate the total encode time for the row and append it.
				161	total_encode_time = sum(encode_times) + other_time
				162	row.append(total_encode_time)
				163	# Append the row to the table data.
				164	table_data.append(row)
				165
				166	# Print the table using tabulate with 'grid' format for better structure.
				167	print(
				168	tabulate(table_data, headers=headers, tablefmt='grid', floatfmt=".2f"))
				169
				170
				171	def plot_bar(binned_data: BinnedData) -> None:
				172	filename = "plot.html"
				173	output_file(filename, title="Message Encode Time Plot Stacked Bar Graph")
				174
				175	# Adjust width based on bin count for readability.
				176	plot_width = max(1200, 50 * len(binned_data.bins))
				177
				178	p = figure(x_range=[bin.bin_range for bin in binned_data.bins],
				179	title='Message Encode Time by Type over Event Loop Time',
				180	x_axis_label='Event Loop Time Bins',
				181	y_axis_label='Total Message Encode Time (ms)',
				182	width=plot_width,
				183	height=600,
				184	tools="")
				185
				186	source_data = {'bin_edges': [bin.bin_range for bin in binned_data.bins]}
				187	for message_type in binned_data.top_type_names + ['other']:
				188	source_data[message_type] = [
				189	bin.message_encode_times.get(message_type, 0)
				190	for bin in binned_data.bins
				191	]
				192
				193	source = ColumnDataSource(data=source_data)
				194
				195	# Calculate totals and sort in descending order.
				196	totals = {
				197	message_type: sum(source_data[message_type])
				198	for message_type in source_data if message_type != 'bin_edges'
				199	}
				200	sorted_message_types = sorted(totals, key=totals.get, reverse=True)
				201
				202	# Reverse the list to place larger segments at the top.
				203	sorted_message_types.reverse()
				204
				205	num_types = len(sorted_message_types)
				206	if num_types > 20:
				207	raise ValueError(
				208	f"Number of types ({num_types}) exceeds the number of available colors in Category20."
				209	)
				210	colors = Category20[20][:num_types]
				211
				212	# Apply reversed order to stack rendering.
				213	renderers = p.vbar_stack(sorted_message_types,
				214	x='bin_edges',
				215	width=0.7,
				216	color=colors,
				217	source=source)
				218
				219	# Change the orientation of the x-axis labels to a 45-degree angle.
				220	p.xaxis.major_label_orientation = math.pi / 4
				221
				222	# Create a custom legend, maintaining the reversed order for visual consistency.
				223	legend_items = [
				224	LegendItem(label=mt.replace('.', ' '), renderers=[renderers[i]])
				225	for i, mt in enumerate(sorted_message_types)
				226	]
				227	legend = Legend(items=legend_items, location=(0, -30))
				228	p.add_layout(legend, 'right')
				229
				230	p.y_range.start = 0
				231	p.x_range.range_padding = 0.05
				232	p.xgrid.grid_line_color = None
				233	p.axis.minor_tick_line_color = None
				234	p.outline_line_color = None
				235
				236	file_path = os.path.realpath(filename)
				237	print('\n')
				238	print(f"Plot saved to '{file_path}'")
				239	webbrowser.open('file://' + file_path)
				240
				241
				242	def main():
				243	parser = argparse.ArgumentParser(
				244	description=
				245	'Process log files to extract and plot message encode times.')
				246	parser.add_argument('--log_file_path',
				247	type=Path,
				248	help='Path to the log file',
				249	required=True)
				250	parser.add_argument(
				251	'--max_lines',
				252	type=int,
				253	default=None,
				254	help='Maximum number of lines to read from the log file')
				255	parser.add_argument('--num_bins',
				256	type=int,
				257	default=40,
				258	help='Number of bins to use')
				259	parser.add_argument('--top_n',
				260	type=int,
				261	default=10,
				262	help='Number of top message types to plot')
				263	parser.add_argument('--start_time',
				264	type=float,
				265	default=None,
				266	help='Start time in seconds')
				267	parser.add_argument('--end_time',
				268	type=float,
				269	default=None,
				270	help='End time in seconds')
				271
				272	args = parser.parse_args()
				273
				274	data_by_type = parse_csv_file(filepath=args.log_file_path,
				275	max_lines=args.max_lines,
				276	start_time=args.start_time,
				277	end_time=args.end_time)
				278	binned_data = create_binned_data(data_by_type,
				279	num_bins=args.num_bins,
				280	top_n=args.top_n)
				281	print_binned_data(binned_data)
				282	plot_bar(binned_data)
				283	print(f"{os.path.basename(__file__)} Finished.")
				284
				285
				286	if __name__ == '__main__':
				287	main()