Blame - aos/events/logging/logfile_utils.cc - RealtimeRoboticsGroup/test

blob: 144890a5a7865f4cfc10aabdd019e54621dd4d94 [file] [log] [blame]

Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	1	#include "aos/events/logging/logfile_utils.h"
				2
				3	#include <fcntl.h>
				4	#include <limits.h>
				5	#include <sys/stat.h>
				6	#include <sys/types.h>
				7	#include <sys/uio.h>
				8
				9	#include <vector>
				10
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	11	#include "aos/configuration.h"
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	12	#include "aos/events/logging/logger_generated.h"
Austin Schuh	fa89589	2020-01-07 20:07:41 -0800	[diff] [blame]	13	#include "aos/flatbuffer_merge.h"
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	14	#include "aos/util/file.h"
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	15	#include "flatbuffers/flatbuffers.h"
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	16	#include "gflags/gflags.h"
				17	#include "glog/logging.h"
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	18
				19	DEFINE_int32(flush_size, 1000000,
				20	"Number of outstanding bytes to allow before flushing to disk.");
				21
				22	namespace aos {
				23	namespace logger {
				24
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	25	namespace chrono = std::chrono;
				26
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	27	DetachedBufferWriter::DetachedBufferWriter(std::string_view filename)
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	28	: filename_(filename) {
				29	util::MkdirP(filename, 0777);
				30	fd_ = open(std::string(filename).c_str(),
				31	O_RDWR \| O_CLOEXEC \| O_CREAT \| O_EXCL, 0774);
				32	VLOG(1) << "Opened " << filename << " for writing";
				33	PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	34	}
				35
				36	DetachedBufferWriter::~DetachedBufferWriter() {
				37	Flush();
				38	PLOG_IF(ERROR, close(fd_) == -1) << " Failed to close logfile";
				39	}
				40
				41	void DetachedBufferWriter::QueueSizedFlatbuffer(
				42	flatbuffers::FlatBufferBuilder *fbb) {
				43	QueueSizedFlatbuffer(fbb->Release());
				44	}
				45
Austin Schuh	de031b7	2020-01-10 19:34:41 -0800	[diff] [blame]	46	void DetachedBufferWriter::WriteSizedFlatbuffer(
				47	absl::Span<const uint8_t> span) {
				48	// Cheat aggressively... Write out the queued up data, and then write this
				49	// data once without buffering. It is hard to make a DetachedBuffer out of
				50	// this data, and we don't want to worry about lifetimes.
				51	Flush();
				52	iovec_.clear();
				53	iovec_.reserve(1);
				54
				55	struct iovec n;
				56	n.iov_base = const_cast<uint8_t *>(span.data());
				57	n.iov_len = span.size();
				58	iovec_.emplace_back(n);
				59
				60	const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
				61
				62	PCHECK(written == static_cast<ssize_t>(n.iov_len))
				63	<< ": Wrote " << written << " expected " << n.iov_len;
				64	}
				65
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	66	void DetachedBufferWriter::QueueSizedFlatbuffer(
				67	flatbuffers::DetachedBuffer &&buffer) {
				68	queued_size_ += buffer.size();
				69	queue_.emplace_back(std::move(buffer));
				70
				71	// Flush if we are at the max number of iovs per writev, or have written
				72	// enough data. Otherwise writev will fail with an invalid argument.
				73	if (queued_size_ > static_cast<size_t>(FLAGS_flush_size) \|\|
				74	queue_.size() == IOV_MAX) {
				75	Flush();
				76	}
				77	}
				78
				79	void DetachedBufferWriter::Flush() {
				80	if (queue_.size() == 0u) {
				81	return;
				82	}
				83	iovec_.clear();
				84	iovec_.reserve(queue_.size());
				85	size_t counted_size = 0;
				86	for (size_t i = 0; i < queue_.size(); ++i) {
				87	struct iovec n;
				88	n.iov_base = queue_[i].data();
				89	n.iov_len = queue_[i].size();
				90	counted_size += n.iov_len;
				91	iovec_.emplace_back(std::move(n));
				92	}
				93	CHECK_EQ(counted_size, queued_size_);
				94	const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
				95
				96	PCHECK(written == static_cast<ssize_t>(queued_size_))
				97	<< ": Wrote " << written << " expected " << queued_size_;
				98
				99	queued_size_ = 0;
				100	queue_.clear();
				101	// TODO(austin): Handle partial writes in some way other than crashing...
				102	}
				103
				104	flatbuffers::Offset<MessageHeader> PackMessage(
				105	flatbuffers::FlatBufferBuilder *fbb, const Context &context,
				106	int channel_index, LogType log_type) {
				107	flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;
				108
				109	switch (log_type) {
				110	case LogType::kLogMessage:
				111	case LogType::kLogMessageAndDeliveryTime:
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	112	case LogType::kLogRemoteMessage:
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	113	data_offset =
				114	fbb->CreateVector(static_cast<uint8_t *>(context.data), context.size);
				115	break;
				116
				117	case LogType::kLogDeliveryTimeOnly:
				118	break;
				119	}
				120
				121	MessageHeader::Builder message_header_builder(*fbb);
				122	message_header_builder.add_channel_index(channel_index);
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	123
				124	switch (log_type) {
				125	case LogType::kLogRemoteMessage:
				126	message_header_builder.add_queue_index(context.remote_queue_index);
				127	message_header_builder.add_monotonic_sent_time(
				128	context.monotonic_remote_time.time_since_epoch().count());
				129	message_header_builder.add_realtime_sent_time(
				130	context.realtime_remote_time.time_since_epoch().count());
				131	break;
				132
				133	case LogType::kLogMessage:
				134	case LogType::kLogMessageAndDeliveryTime:
				135	case LogType::kLogDeliveryTimeOnly:
				136	message_header_builder.add_queue_index(context.queue_index);
				137	message_header_builder.add_monotonic_sent_time(
				138	context.monotonic_event_time.time_since_epoch().count());
				139	message_header_builder.add_realtime_sent_time(
				140	context.realtime_event_time.time_since_epoch().count());
				141	break;
				142	}
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	143
				144	switch (log_type) {
				145	case LogType::kLogMessage:
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	146	case LogType::kLogRemoteMessage:
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	147	message_header_builder.add_data(data_offset);
				148	break;
				149
				150	case LogType::kLogMessageAndDeliveryTime:
				151	message_header_builder.add_data(data_offset);
				152	[[fallthrough]];
				153
				154	case LogType::kLogDeliveryTimeOnly:
				155	message_header_builder.add_monotonic_remote_time(
				156	context.monotonic_remote_time.time_since_epoch().count());
				157	message_header_builder.add_realtime_remote_time(
				158	context.realtime_remote_time.time_since_epoch().count());
				159	message_header_builder.add_remote_queue_index(context.remote_queue_index);
				160	break;
				161	}
				162
				163	return message_header_builder.Finish();
				164	}
				165
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	166	SpanReader::SpanReader(std::string_view filename)
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	167	: filename_(filename),
				168	fd_(open(std::string(filename).c_str(), O_RDONLY \| O_CLOEXEC)) {
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	169	PCHECK(fd_ != -1) << ": Failed to open " << filename;
				170	}
				171
				172	absl::Span<const uint8_t> SpanReader::ReadMessage() {
				173	// Make sure we have enough for the size.
				174	if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
				175	if (!ReadBlock()) {
				176	return absl::Span<const uint8_t>();
				177	}
				178	}
				179
				180	// Now make sure we have enough for the message.
				181	const size_t data_size =
				182	flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
				183	sizeof(flatbuffers::uoffset_t);
				184	while (data_.size() < consumed_data_ + data_size) {
				185	if (!ReadBlock()) {
				186	return absl::Span<const uint8_t>();
				187	}
				188	}
				189
				190	// And return it, consuming the data.
				191	const uint8_t *data_ptr = data_.data() + consumed_data_;
				192
				193	consumed_data_ += data_size;
				194
				195	return absl::Span<const uint8_t>(data_ptr, data_size);
				196	}
				197
				198	bool SpanReader::MessageAvailable() {
				199	// Are we big enough to read the size?
				200	if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
				201	return false;
				202	}
				203
				204	// Then, are we big enough to read the full message?
				205	const size_t data_size =
				206	flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
				207	sizeof(flatbuffers::uoffset_t);
				208	if (data_.size() < consumed_data_ + data_size) {
				209	return false;
				210	}
				211
				212	return true;
				213	}
				214
				215	bool SpanReader::ReadBlock() {
				216	if (end_of_file_) {
				217	return false;
				218	}
				219
				220	// Appends 256k. This is enough that the read call is efficient. We don't
				221	// want to spend too much time reading small chunks because the syscalls for
				222	// that will be expensive.
				223	constexpr size_t kReadSize = 256 * 1024;
				224
				225	// Strip off any unused data at the front.
				226	if (consumed_data_ != 0) {
				227	data_.erase(data_.begin(), data_.begin() + consumed_data_);
				228	consumed_data_ = 0;
				229	}
				230
				231	const size_t starting_size = data_.size();
				232
				233	// This should automatically grow the backing store. It won't shrink if we
				234	// get a small chunk later. This reduces allocations when we want to append
				235	// more data.
				236	data_.resize(data_.size() + kReadSize);
				237
				238	ssize_t count = read(fd_, &data_[starting_size], kReadSize);
				239	data_.resize(starting_size + std::max(count, static_cast<ssize_t>(0)));
				240	if (count == 0) {
				241	end_of_file_ = true;
				242	return false;
				243	}
				244	PCHECK(count > 0);
				245
				246	return true;
				247	}
				248
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	249	FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename) {
				250	SpanReader span_reader(filename);
				251	// Make sure we have enough to read the size.
				252	absl::Span<const uint8_t> config_data = span_reader.ReadMessage();
				253
				254	// Make sure something was read.
				255	CHECK(config_data != absl::Span<const uint8_t>());
				256
				257	// And copy the config so we have it forever.
				258	std::vector<uint8_t> data(
				259	config_data.begin() + sizeof(flatbuffers::uoffset_t), config_data.end());
				260	return FlatbufferVector<LogFileHeader>(std::move(data));
				261	}
				262
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	263	MessageReader::MessageReader(std::string_view filename)
				264	: span_reader_(filename) {
				265	// Make sure we have enough to read the size.
				266	absl::Span<const uint8_t> config_data = span_reader_.ReadMessage();
				267
				268	// Make sure something was read.
				269	CHECK(config_data != absl::Span<const uint8_t>());
				270
				271	// And copy the config so we have it forever.
				272	configuration_ = std::vector<uint8_t>(config_data.begin(), config_data.end());
				273
				274	max_out_of_order_duration_ = std::chrono::nanoseconds(
				275	flatbuffers::GetSizePrefixedRoot<LogFileHeader>(configuration_.data())
				276	->max_out_of_order_duration());
				277	}
				278
				279	std::optional<FlatbufferVector<MessageHeader>> MessageReader::ReadMessage() {
				280	absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();
				281	if (msg_data == absl::Span<const uint8_t>()) {
				282	return std::nullopt;
				283	}
				284
				285	FlatbufferVector<MessageHeader> result{std::vector<uint8_t>(
				286	msg_data.begin() + sizeof(flatbuffers::uoffset_t), msg_data.end())};
				287
				288	const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
				289	chrono::nanoseconds(result.message().monotonic_sent_time()));
				290
				291	newest_timestamp_ = std::max(newest_timestamp_, timestamp);
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	292	VLOG(1) << "Read from " << filename().substr(130) << " data "
				293	<< FlatbufferToJson(result);
				294	return std::move(result);
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	295	}
				296
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	297	SplitMessageReader::SplitMessageReader(
Austin Schuh	fa89589	2020-01-07 20:07:41 -0800	[diff] [blame]	298	const std::vector<std::string> &filenames)
				299	: filenames_(filenames),
				300	log_file_header_(FlatbufferDetachedBuffer<LogFileHeader>::Empty()) {
				301	CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";
				302
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	303	// Grab any log file header. They should all match (and we will check as we
				304	// open more of them).
Austin Schuh	fa89589	2020-01-07 20:07:41 -0800	[diff] [blame]	305	log_file_header_ = CopyFlatBuffer(message_reader_->log_file_header());
				306
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	307	// Setup per channel state.
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	308	channels_.resize(configuration()->channels()->size());
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	309	for (ChannelData &channel_data : channels_) {
				310	channel_data.data.split_reader = this;
				311	// Build up the timestamp list.
				312	if (configuration::MultiNode(configuration())) {
				313	channel_data.timestamps.resize(configuration()->nodes()->size());
				314	for (MessageHeaderQueue &queue : channel_data.timestamps) {
				315	queue.timestamps = true;
				316	queue.split_reader = this;
				317	}
				318	}
				319	}
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	320
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	321	// Build up channels_to_write_ as an optimization to make it fast to figure
				322	// out which datastructure to place any new data from a channel on.
				323	for (const Channel channel : configuration()->channels()) {
				324	// This is the main case. We will only see data on this node.
				325	if (configuration::ChannelIsSendableOnNode(channel, node())) {
				326	channels_to_write_.emplace_back(
				327	&channels_[channels_to_write_.size()].data);
				328	} else
				329	// If we can't send, but can receive, we should be able to see
				330	// timestamps here.
				331	if (configuration::ChannelIsReadableOnNode(channel, node())) {
				332	channels_to_write_.emplace_back(
				333	&(channels_[channels_to_write_.size()]
				334	.timestamps[configuration::GetNodeIndex(configuration(),
				335	node())]));
				336	} else {
				337	channels_to_write_.emplace_back(nullptr);
				338	}
				339	}
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	340	}
				341
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	342	bool SplitMessageReader::NextLogFile() {
Austin Schuh	fa89589	2020-01-07 20:07:41 -0800	[diff] [blame]	343	if (next_filename_index_ == filenames_.size()) {
				344	return false;
				345	}
				346	message_reader_ =
				347	std::make_unique<MessageReader>(filenames_[next_filename_index_]);
				348
				349	// We can't support the config diverging between two log file headers. See if
				350	// they are the same.
				351	if (next_filename_index_ != 0) {
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	352	CHECK(CompareFlatBuffer(&log_file_header_.message(),
				353	message_reader_->log_file_header()))
Austin Schuh	fa89589	2020-01-07 20:07:41 -0800	[diff] [blame]	354	<< ": Header is different between log file chunks "
				355	<< filenames_[next_filename_index_] << " and "
				356	<< filenames_[next_filename_index_ - 1] << ", this is not supported.";
				357	}
				358
				359	++next_filename_index_;
				360	return true;
				361	}
				362
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	363	bool SplitMessageReader::QueueMessages(
				364	monotonic_clock::time_point oldest_message_time) {
				365	// TODO(austin): Once we are happy that everything works, read a 256kb chunk
				366	// to reduce the need to re-heap down below.
				367	while (true) {
				368	// Don't queue if we have enough data already.
				369	// When a log file starts, there should be a message from each channel.
				370	// Those messages might be very old. Make sure to read a chunk past the
				371	// starting time.
				372	if (queued_messages_ > 0 &&
				373	message_reader_->queue_data_time() > oldest_message_time) {
				374	return true;
				375	}
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	376
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	377	if (std::optional<FlatbufferVector<MessageHeader>> msg =
				378	message_reader_->ReadMessage()) {
				379	const MessageHeader &header = msg.value().message();
				380
				381	const int channel_index = header.channel_index();
				382	channels_to_write_[channel_index]->emplace_back(std::move(msg.value()));
				383
				384	++queued_messages_;
				385	} else {
				386	if (!NextLogFile()) {
				387	return false;
				388	}
				389	}
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	390	}
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	391	}
				392
				393	void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,
				394	int channel_index,
				395	const Node *target_node) {
				396	const Node *reinterpreted_target_node =
				397	configuration::GetNodeOrDie(configuration(), target_node);
				398	const Channel *const channel =
				399	configuration()->channels()->Get(channel_index);
				400
				401	MessageHeaderQueue *message_header_queue = nullptr;
				402
				403	// Figure out if this log file is from our point of view, or the other node's
				404	// point of view.
				405	if (node() == reinterpreted_target_node) {
				406	if (channels_to_write_[channel_index] != nullptr) {
				407	// We already have deduced which is the right channel. Use
				408	// channels_to_write_ here.
				409	message_header_queue = channels_to_write_[channel_index];
				410	} else {
				411	// This means this is data from another node, and will be ignored.
				412	}
				413	} else {
				414	// We are replaying from another node's point of view. The only interesting
				415	// data is data that is forwarded to our node, ie was sent on the other
				416	// node.
				417	if (configuration::ChannelIsSendableOnNode(channel, node())) {
				418	// Data from another node.
				419	message_header_queue = &(channels_[channel_index].data);
				420	} else {
				421	// This is either not sendable on the other node, or is a timestamp and
				422	// therefore not interesting.
				423	}
				424	}
				425
				426	// If we found one, write it down. This will be nullptr when there is nothing
				427	// relevant on this channel on this node for the target node. In that case,
				428	// we want to drop the message instead of queueing it.
				429	if (message_header_queue != nullptr) {
				430	message_header_queue->timestamp_merger = timestamp_merger;
				431	}
				432	}
				433
				434	std::tuple<monotonic_clock::time_point, uint32_t,
				435	FlatbufferVector<MessageHeader>>
				436	SplitMessageReader::PopOldest(int channel_index) {
				437	CHECK_GT(channels_[channel_index].data.size(), 0u);
				438	const std::tuple<monotonic_clock::time_point, uint32_t> timestamp =
				439	channels_[channel_index].data.front_timestamp();
				440	FlatbufferVector<MessageHeader> front =
				441	std::move(channels_[channel_index].data.front());
				442	channels_[channel_index].data.pop_front();
				443	--queued_messages_;
				444
				445	return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
				446	std::move(front));
				447	}
				448
				449	std::tuple<monotonic_clock::time_point, uint32_t,
				450	FlatbufferVector<MessageHeader>>
				451	SplitMessageReader::PopOldest(int channel, int node_index) {
				452	CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);
				453	const std::tuple<monotonic_clock::time_point, uint32_t> timestamp =
				454	channels_[channel].timestamps[node_index].front_timestamp();
				455	FlatbufferVector<MessageHeader> front =
				456	std::move(channels_[channel].timestamps[node_index].front());
				457	channels_[channel].timestamps[node_index].pop_front();
				458	--queued_messages_;
				459
				460	return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
				461	std::move(front));
				462	}
				463
				464	void SplitMessageReader::MessageHeaderQueue::emplace_back(
				465	FlatbufferVector<MessageHeader> &&msg) {
				466	CHECK(split_reader != nullptr);
				467
				468	// If there is no timestamp merger for this queue, nobody is listening. Drop
				469	// the message. This happens when a log file from another node is replayed,
				470	// and the timestamp mergers down stream just don't care.
				471	if (timestamp_merger == nullptr) {
				472	return;
				473	}
				474
				475	CHECK(timestamps != msg.message().has_data())
				476	<< ": Got timestamps and data mixed up on a node. "
				477	<< FlatbufferToJson(msg);
				478
				479	data_.emplace_back(std::move(msg));
				480
				481	if (data_.size() == 1u) {
				482	// Yup, new data. Notify.
				483	if (timestamps) {
				484	timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
				485	} else {
				486	timestamp_merger->Update(split_reader, front_timestamp());
				487	}
				488	}
				489	}
				490
				491	void SplitMessageReader::MessageHeaderQueue::pop_front() {
				492	data_.pop_front();
				493	if (data_.size() != 0u) {
				494	// Yup, new data.
				495	if (timestamps) {
				496	timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
				497	} else {
				498	timestamp_merger->Update(split_reader, front_timestamp());
				499	}
				500	}
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	501	}
				502
				503	namespace {
				504
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	505	bool SplitMessageReaderHeapCompare(
				506	const std::tuple<monotonic_clock::time_point, uint32_t,
				507	SplitMessageReader *>
				508	first,
				509	const std::tuple<monotonic_clock::time_point, uint32_t,
				510	SplitMessageReader *>
				511	second) {
				512	if (std::get<0>(first) > std::get<0>(second)) {
				513	return true;
				514	} else if (std::get<0>(first) == std::get<0>(second)) {
				515	if (std::get<1>(first) > std::get<1>(second)) {
				516	return true;
				517	} else if (std::get<1>(first) == std::get<1>(second)) {
				518	return std::get<2>(first) > std::get<2>(second);
				519	} else {
				520	return false;
				521	}
				522	} else {
				523	return false;
				524	}
				525	}
				526
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	527	bool ChannelHeapCompare(
				528	const std::pair<monotonic_clock::time_point, int> first,
				529	const std::pair<monotonic_clock::time_point, int> second) {
				530	if (first.first > second.first) {
				531	return true;
				532	} else if (first.first == second.first) {
				533	return first.second > second.second;
				534	} else {
				535	return false;
				536	}
				537	}
				538
				539	} // namespace
				540
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	541	TimestampMerger::TimestampMerger(
				542	const Configuration *configuration,
				543	std::vector<SplitMessageReader *> split_message_readers, int channel_index,
				544	const Node target_node, ChannelMerger channel_merger)
				545	: configuration_(configuration),
				546	split_message_readers_(std::move(split_message_readers)),
				547	channel_index_(channel_index),
				548	node_index_(configuration::MultiNode(configuration)
				549	? configuration::GetNodeIndex(configuration, target_node)
				550	: -1),
				551	channel_merger_(channel_merger) {
				552	// Tell the readers we care so they know who to notify.
				553	for (SplitMessageReader *reader : split_message_readers_) {
				554	reader->SetTimestampMerger(this, channel_index, target_node);
				555	}
				556
				557	// And then determine if we need to track timestamps.
				558	const Channel *channel = configuration->channels()->Get(channel_index);
				559	if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&
				560	configuration::ChannelIsReadableOnNode(channel, target_node)) {
				561	has_timestamps_ = true;
				562	}
				563	}
				564
				565	void TimestampMerger::PushMessageHeap(
				566	std::tuple<monotonic_clock::time_point, uint32_t> timestamp,
				567	SplitMessageReader *split_message_reader) {
				568	DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),
				569	[split_message_reader](
				570	const std::tuple<monotonic_clock::time_point,
				571	uint32_t, SplitMessageReader *>
				572	x) {
				573	return std::get<2>(x) == split_message_reader;
				574	}) == message_heap_.end())
				575	<< ": Pushing message when it is already in the heap.";
				576
				577	message_heap_.push_back(std::make_tuple(
				578	std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
				579
				580	std::push_heap(message_heap_.begin(), message_heap_.end(),
				581	&SplitMessageReaderHeapCompare);
				582
				583	// If we are just a data merger, don't wait for timestamps.
				584	if (!has_timestamps_) {
				585	channel_merger_->Update(std::get<0>(timestamp), channel_index_);
				586	pushed_ = true;
				587	}
				588	}
				589
				590	void TimestampMerger::PushTimestampHeap(
				591	std::tuple<monotonic_clock::time_point, uint32_t> timestamp,
				592	SplitMessageReader *split_message_reader) {
				593	DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),
				594	[split_message_reader](
				595	const std::tuple<monotonic_clock::time_point,
				596	uint32_t, SplitMessageReader *>
				597	x) {
				598	return std::get<2>(x) == split_message_reader;
				599	}) == timestamp_heap_.end())
				600	<< ": Pushing timestamp when it is already in the heap.";
				601
				602	timestamp_heap_.push_back(std::make_tuple(
				603	std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
				604
				605	std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
				606	SplitMessageReaderHeapCompare);
				607
				608	// If we are a timestamp merger, don't wait for data. Missing data will be
				609	// caught at read time.
				610	if (has_timestamps_) {
				611	channel_merger_->Update(std::get<0>(timestamp), channel_index_);
				612	pushed_ = true;
				613	}
				614	}
				615
				616	std::tuple<monotonic_clock::time_point, uint32_t,
				617	FlatbufferVector<MessageHeader>>
				618	TimestampMerger::PopMessageHeap() {
				619	// Pop the oldest message reader pointer off the heap.
				620	CHECK_GT(message_heap_.size(), 0u);
				621	std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
				622	oldest_message_reader = message_heap_.front();
				623
				624	std::pop_heap(message_heap_.begin(), message_heap_.end(),
				625	&SplitMessageReaderHeapCompare);
				626	message_heap_.pop_back();
				627
				628	// Pop the oldest message. This re-pushes any messages from the reader to the
				629	// message heap.
				630	std::tuple<monotonic_clock::time_point, uint32_t,
				631	FlatbufferVector<MessageHeader>>
				632	oldest_message =
				633	std::get<2>(oldest_message_reader)->PopOldest(channel_index_);
				634
				635	// Confirm that the time and queue_index we have recorded matches.
				636	CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));
				637	CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));
				638
				639	// Now, keep reading until we have found all duplicates.
				640	while (message_heap_.size() > 0u) {
				641	// See if it is a duplicate.
				642	std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
				643	next_oldest_message_reader = message_heap_.front();
				644
				645	std::tuple<monotonic_clock::time_point, uint32_t> next_oldest_message_time =
				646	std::get<2>(next_oldest_message_reader)->oldest_message(channel_index_);
				647
				648	if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&
				649	std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {
				650	// Pop the message reader pointer.
				651	std::pop_heap(message_heap_.begin(), message_heap_.end(),
				652	&SplitMessageReaderHeapCompare);
				653	message_heap_.pop_back();
				654
				655	// Pop the next oldest message. This re-pushes any messages from the
				656	// reader.
				657	std::tuple<monotonic_clock::time_point, uint32_t,
				658	FlatbufferVector<MessageHeader>>
				659	next_oldest_message = std::get<2>(next_oldest_message_reader)
				660	->PopOldest(channel_index_);
				661
				662	// And make sure the message matches in it's entirety.
				663	CHECK(std::get<2>(oldest_message).span() ==
				664	std::get<2>(next_oldest_message).span())
				665	<< ": Data at the same timestamp doesn't match.";
				666	} else {
				667	break;
				668	}
				669	}
				670
				671	return oldest_message;
				672	}
				673
				674	std::tuple<monotonic_clock::time_point, uint32_t,
				675	FlatbufferVector<MessageHeader>>
				676	TimestampMerger::PopTimestampHeap() {
				677	// Pop the oldest message reader pointer off the heap.
				678	CHECK_GT(timestamp_heap_.size(), 0u);
				679
				680	std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
				681	oldest_timestamp_reader = timestamp_heap_.front();
				682
				683	std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
				684	&SplitMessageReaderHeapCompare);
				685	timestamp_heap_.pop_back();
				686
				687	CHECK(node_index_ != -1) << ": Timestamps in a single node environment";
				688
				689	// Pop the oldest message. This re-pushes any timestamps from the reader to
				690	// the timestamp heap.
				691	std::tuple<monotonic_clock::time_point, uint32_t,
				692	FlatbufferVector<MessageHeader>>
				693	oldest_timestamp = std::get<2>(oldest_timestamp_reader)
				694	->PopOldest(channel_index_, node_index_);
				695
				696	// Confirm that the time we have recorded matches.
				697	CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));
				698	CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));
				699
				700	// TODO(austin): What if we get duplicate timestamps?
				701
				702	return oldest_timestamp;
				703	}
				704
				705	std::tuple<TimestampMerger::DeliveryTimestamp, FlatbufferVector<MessageHeader>>
				706	TimestampMerger::PopOldest() {
				707	if (has_timestamps_) {
				708	CHECK_GT(message_heap_.size(), 0u)
				709	<< ": Missing data from source node, no data available to match "
				710	"timestamp on "
				711	<< configuration::CleanedChannelToString(
				712	configuration_->channels()->Get(channel_index_));
				713
				714	std::tuple<monotonic_clock::time_point, uint32_t,
				715	FlatbufferVector<MessageHeader>>
				716	oldest_timestamp = PopTimestampHeap();
				717
				718	TimestampMerger::DeliveryTimestamp timestamp;
				719	timestamp.monotonic_event_time =
				720	monotonic_clock::time_point(chrono::nanoseconds(
				721	std::get<2>(oldest_timestamp).message().monotonic_sent_time()));
				722	timestamp.realtime_event_time =
				723	realtime_clock::time_point(chrono::nanoseconds(
				724	std::get<2>(oldest_timestamp).message().realtime_sent_time()));
				725
				726	// Consistency check.
				727	CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));
				728	CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),
				729	std::get<1>(oldest_timestamp));
				730
				731	monotonic_clock::time_point remote_timestamp_monotonic_time(
				732	chrono::nanoseconds(
				733	std::get<2>(oldest_timestamp).message().monotonic_remote_time()));
				734
				735	while (true) {
				736	// Ok, now try grabbing data until we find one which matches.
				737	std::tuple<monotonic_clock::time_point, uint32_t,
				738	FlatbufferVector<MessageHeader>>
				739	oldest_message = PopMessageHeap();
				740
				741	// Time at which the message was sent (this message is written from the
				742	// sending node's perspective.
				743	monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(
				744	std::get<2>(oldest_message).message().monotonic_sent_time()));
				745
				746	if (remote_monotonic_time < remote_timestamp_monotonic_time) {
				747	LOG(INFO) << "Undelivered message, skipping. Remote time is "
				748	<< remote_monotonic_time << " timestamp is "
				749	<< remote_timestamp_monotonic_time << " on channel "
				750	<< channel_index_;
				751	continue;
				752	}
				753
				754	timestamp.monotonic_remote_time = remote_monotonic_time;
				755	timestamp.realtime_remote_time =
				756	realtime_clock::time_point(chrono::nanoseconds(
				757	std::get<2>(oldest_message).message().realtime_sent_time()));
				758	timestamp.remote_queue_index =
				759	std::get<2>(oldest_message).message().queue_index();
				760
				761	CHECK_EQ(remote_monotonic_time, remote_timestamp_monotonic_time);
				762	CHECK_EQ(timestamp.remote_queue_index, std::get<1>(oldest_timestamp));
				763
				764	return std::make_tuple(timestamp, std::get<2>(oldest_message));
				765	}
				766	} else {
				767	std::tuple<monotonic_clock::time_point, uint32_t,
				768	FlatbufferVector<MessageHeader>>
				769	oldest_message = PopMessageHeap();
				770
				771	TimestampMerger::DeliveryTimestamp timestamp;
				772	timestamp.monotonic_event_time =
				773	monotonic_clock::time_point(chrono::nanoseconds(
				774	std::get<2>(oldest_message).message().monotonic_sent_time()));
				775	timestamp.realtime_event_time =
				776	realtime_clock::time_point(chrono::nanoseconds(
				777	std::get<2>(oldest_message).message().realtime_sent_time()));
				778	timestamp.remote_queue_index = 0xffffffff;
				779
				780	CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);
				781	CHECK_EQ(std::get<1>(oldest_message),
				782	std::get<2>(oldest_message).message().queue_index());
				783
				784	return std::make_tuple(timestamp, std::get<2>(oldest_message));
				785	}
				786	}
				787
				788	namespace {
				789	std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(
				790	const std::vector<std::vector<std::string>> &filenames) {
				791	CHECK_GT(filenames.size(), 0u);
				792	// Build up all the SplitMessageReaders.
				793	std::vector<std::unique_ptr<SplitMessageReader>> result;
				794	for (const std::vector<std::string> &filenames : filenames) {
				795	result.emplace_back(std::make_unique<SplitMessageReader>(filenames));
				796	}
				797	return result;
				798	}
				799	} // namespace
				800
				801	ChannelMerger::ChannelMerger(
				802	const std::vector<std::vector<std::string>> &filenames)
				803	: split_message_readers_(MakeSplitMessageReaders(filenames)),
				804	log_file_header_(
				805	CopyFlatBuffer(split_message_readers_[0]->log_file_header())) {
				806	// Now, confirm that the configuration matches for each and pick a start time.
				807	// Also return the list of possible nodes.
				808	for (const std::unique_ptr<SplitMessageReader> &reader :
				809	split_message_readers_) {
				810	CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),
				811	reader->log_file_header()->configuration()))
				812	<< ": Replaying log files with different configurations isn't "
				813	"supported";
				814	}
				815
				816	nodes_ = configuration::GetNodes(configuration());
				817	}
				818
				819	bool ChannelMerger::SetNode(const Node *target_node) {
				820	std::vector<SplitMessageReader *> split_message_readers;
				821	for (const std::unique_ptr<SplitMessageReader> &reader :
				822	split_message_readers_) {
				823	split_message_readers.emplace_back(reader.get());
				824	}
				825
				826	// Go find a log_file_header for this node.
				827	{
				828	bool found_node = false;
				829
				830	for (const std::unique_ptr<SplitMessageReader> &reader :
				831	split_message_readers_) {
				832	if (CompareFlatBuffer(reader->node(), target_node)) {
				833	if (!found_node) {
				834	found_node = true;
				835	log_file_header_ = CopyFlatBuffer(reader->log_file_header());
				836	} else {
				837	// And then make sure all the other files have matching headers.
				838	CHECK(
				839	CompareFlatBuffer(log_file_header(), reader->log_file_header()));
				840	}
				841	}
				842	}
				843
				844	if (!found_node) {
				845	LOG(WARNING) << "Failed to find log file for node "
				846	<< FlatbufferToJson(target_node);
				847	return false;
				848	}
				849	}
				850
				851	// Build up all the timestamp mergers. This connects up all the
				852	// SplitMessageReaders.
				853	timestamp_mergers_.reserve(configuration()->channels()->size());
				854	for (size_t channel_index = 0;
				855	channel_index < configuration()->channels()->size(); ++channel_index) {
				856	timestamp_mergers_.emplace_back(
				857	configuration(), split_message_readers, channel_index,
				858	configuration::GetNode(configuration(), target_node), this);
				859	}
				860
				861	// And prime everything.
				862	size_t split_message_reader_index = 0;
				863	for (std::unique_ptr<SplitMessageReader> &split_message_reader :
				864	split_message_readers_) {
				865	if (split_message_reader->QueueMessages(
				866	split_message_reader->monotonic_start_time())) {
				867	split_message_reader_heap_.push_back(std::make_pair(
				868	split_message_reader->queue_data_time(), split_message_reader_index));
				869
				870	std::push_heap(split_message_reader_heap_.begin(),
				871	split_message_reader_heap_.end(), ChannelHeapCompare);
				872	}
				873	++split_message_reader_index;
				874	}
				875
				876	node_ = configuration::GetNodeOrDie(configuration(), target_node);
				877	return true;
				878	}
				879
				880	monotonic_clock::time_point ChannelMerger::OldestMessage() const {
				881	if (channel_heap_.size() == 0u) {
				882	return monotonic_clock::max_time;
				883	}
				884	return channel_heap_.front().first;
				885	}
				886
				887	void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,
				888	int channel_index) {
				889	// Pop and recreate the heap if it has already been pushed. And since we are
				890	// pushing again, we don't need to clear pushed.
				891	if (timestamp_mergers_[channel_index].pushed()) {
				892	channel_heap_.erase(std::find_if(
				893	channel_heap_.begin(), channel_heap_.end(),
				894	[channel_index](const std::pair<monotonic_clock::time_point, int> x) {
				895	return x.second == channel_index;
				896	}));
				897	std::make_heap(channel_heap_.begin(), channel_heap_.end(),
				898	ChannelHeapCompare);
				899	}
				900
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	901	channel_heap_.push_back(std::make_pair(timestamp, channel_index));
				902
				903	// The default sort puts the newest message first. Use a custom comparator to
				904	// put the oldest message first.
				905	std::push_heap(channel_heap_.begin(), channel_heap_.end(),
				906	ChannelHeapCompare);
				907	}
				908
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	909	std::tuple<TimestampMerger::DeliveryTimestamp, int,
				910	FlatbufferVector<MessageHeader>>
				911	ChannelMerger::PopOldest() {
				912	CHECK(channel_heap_.size() > 0);
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	913	std::pair<monotonic_clock::time_point, int> oldest_channel_data =
				914	channel_heap_.front();
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	915	int channel_index = oldest_channel_data.second;
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	916	std::pop_heap(channel_heap_.begin(), channel_heap_.end(),
				917	&ChannelHeapCompare);
				918	channel_heap_.pop_back();
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	919	timestamp_mergers_[channel_index].set_pushed(false);
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	920
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	921	TimestampMerger *merger = &timestamp_mergers_[channel_index];
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	922
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	923	// Merger auto-pushes from here, but doesn't fetch anything new from the log
				924	// file.
				925	std::tuple<TimestampMerger::DeliveryTimestamp,
				926	FlatbufferVector<MessageHeader>>
				927	message = merger->PopOldest();
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	928
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	929	QueueMessages(OldestMessage());
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	930
Austin Schuh	6f3babe	2020-01-26 20:34:50 -0800	[diff] [blame^]	931	return std::make_tuple(std::get<0>(message), channel_index,
				932	std::move(std::get<1>(message)));
				933	}
				934
				935	void ChannelMerger::QueueMessages(
				936	monotonic_clock::time_point oldest_message_time) {
				937	// Pop and re-queue readers until they are all caught up.
				938	while (true) {
				939	if (split_message_reader_heap_.size() == 0) {
				940	return;
				941	}
				942	std::pair<monotonic_clock::time_point, int> oldest_channel_data =
				943	split_message_reader_heap_.front();
				944
				945	// No work to do, bail.
				946	if (oldest_channel_data.first > oldest_message_time) {
				947	return;
				948	}
				949
				950	// Drop it off the heap.
				951	std::pop_heap(split_message_reader_heap_.begin(),
				952	split_message_reader_heap_.end(), &ChannelHeapCompare);
				953	split_message_reader_heap_.pop_back();
				954
				955	// And if there is data left in the log file, push it back on the heap with
				956	// the updated time.
				957	const int split_message_reader_index = oldest_channel_data.second;
				958	if (split_message_readers_[split_message_reader_index]->QueueMessages(
				959	oldest_message_time)) {
				960	split_message_reader_heap_.push_back(std::make_pair(
				961	split_message_readers_[split_message_reader_index]->queue_data_time(),
				962	split_message_reader_index));
				963
				964	std::push_heap(split_message_reader_heap_.begin(),
				965	split_message_reader_heap_.end(), ChannelHeapCompare);
				966	}
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	967	}
Austin Schuh	05b7047	2020-01-01 17:11:17 -0800	[diff] [blame]	968	}
				969
Austin Schuh	a36c890	2019-12-30 18:07:15 -0800	[diff] [blame]	970	} // namespace logger
				971	} // namespace aos