blob: ade82f9ac8e7532f6a1454bc4941100123a38393 [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#include "aos/events/logging/logfile_utils.h"
2
3#include <fcntl.h>
4#include <limits.h>
5#include <sys/stat.h>
6#include <sys/types.h>
7#include <sys/uio.h>
8
9#include <vector>
10
Austin Schuhe4fca832020-03-07 16:58:53 -080011#include "absl/strings/escaping.h"
Austin Schuh05b70472020-01-01 17:11:17 -080012#include "aos/configuration.h"
Austin Schuha36c8902019-12-30 18:07:15 -080013#include "aos/events/logging/logger_generated.h"
Austin Schuhfa895892020-01-07 20:07:41 -080014#include "aos/flatbuffer_merge.h"
Austin Schuh6f3babe2020-01-26 20:34:50 -080015#include "aos/util/file.h"
Austin Schuha36c8902019-12-30 18:07:15 -080016#include "flatbuffers/flatbuffers.h"
Austin Schuh05b70472020-01-01 17:11:17 -080017#include "gflags/gflags.h"
18#include "glog/logging.h"
Austin Schuha36c8902019-12-30 18:07:15 -080019
20DEFINE_int32(flush_size, 1000000,
21 "Number of outstanding bytes to allow before flushing to disk.");
22
23namespace aos {
24namespace logger {
25
Austin Schuh05b70472020-01-01 17:11:17 -080026namespace chrono = std::chrono;
27
Austin Schuha36c8902019-12-30 18:07:15 -080028DetachedBufferWriter::DetachedBufferWriter(std::string_view filename)
Austin Schuh6f3babe2020-01-26 20:34:50 -080029 : filename_(filename) {
30 util::MkdirP(filename, 0777);
31 fd_ = open(std::string(filename).c_str(),
32 O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
33 VLOG(1) << "Opened " << filename << " for writing";
34 PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
Austin Schuha36c8902019-12-30 18:07:15 -080035}
36
37DetachedBufferWriter::~DetachedBufferWriter() {
38 Flush();
39 PLOG_IF(ERROR, close(fd_) == -1) << " Failed to close logfile";
40}
41
42void DetachedBufferWriter::QueueSizedFlatbuffer(
43 flatbuffers::FlatBufferBuilder *fbb) {
44 QueueSizedFlatbuffer(fbb->Release());
45}
46
Austin Schuhde031b72020-01-10 19:34:41 -080047void DetachedBufferWriter::WriteSizedFlatbuffer(
48 absl::Span<const uint8_t> span) {
49 // Cheat aggressively... Write out the queued up data, and then write this
50 // data once without buffering. It is hard to make a DetachedBuffer out of
51 // this data, and we don't want to worry about lifetimes.
52 Flush();
53 iovec_.clear();
54 iovec_.reserve(1);
55
56 struct iovec n;
57 n.iov_base = const_cast<uint8_t *>(span.data());
58 n.iov_len = span.size();
59 iovec_.emplace_back(n);
60
61 const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
62
63 PCHECK(written == static_cast<ssize_t>(n.iov_len))
64 << ": Wrote " << written << " expected " << n.iov_len;
Brian Silverman98360e22020-04-28 16:51:20 -070065 written_size_ += written;
Austin Schuhde031b72020-01-10 19:34:41 -080066}
67
Austin Schuha36c8902019-12-30 18:07:15 -080068void DetachedBufferWriter::QueueSizedFlatbuffer(
69 flatbuffers::DetachedBuffer &&buffer) {
70 queued_size_ += buffer.size();
71 queue_.emplace_back(std::move(buffer));
72
73 // Flush if we are at the max number of iovs per writev, or have written
74 // enough data. Otherwise writev will fail with an invalid argument.
75 if (queued_size_ > static_cast<size_t>(FLAGS_flush_size) ||
76 queue_.size() == IOV_MAX) {
77 Flush();
78 }
79}
80
81void DetachedBufferWriter::Flush() {
82 if (queue_.size() == 0u) {
83 return;
84 }
85 iovec_.clear();
86 iovec_.reserve(queue_.size());
87 size_t counted_size = 0;
88 for (size_t i = 0; i < queue_.size(); ++i) {
89 struct iovec n;
90 n.iov_base = queue_[i].data();
91 n.iov_len = queue_[i].size();
92 counted_size += n.iov_len;
93 iovec_.emplace_back(std::move(n));
94 }
95 CHECK_EQ(counted_size, queued_size_);
96 const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
97
98 PCHECK(written == static_cast<ssize_t>(queued_size_))
99 << ": Wrote " << written << " expected " << queued_size_;
Brian Silverman98360e22020-04-28 16:51:20 -0700100 written_size_ += written;
Austin Schuha36c8902019-12-30 18:07:15 -0800101
102 queued_size_ = 0;
103 queue_.clear();
104 // TODO(austin): Handle partial writes in some way other than crashing...
105}
106
107flatbuffers::Offset<MessageHeader> PackMessage(
108 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
109 int channel_index, LogType log_type) {
110 flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;
111
112 switch (log_type) {
113 case LogType::kLogMessage:
114 case LogType::kLogMessageAndDeliveryTime:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800115 case LogType::kLogRemoteMessage:
Brian Silvermaneaa41d62020-07-08 19:47:35 -0700116 data_offset = fbb->CreateVector(
117 static_cast<const uint8_t *>(context.data), context.size);
Austin Schuha36c8902019-12-30 18:07:15 -0800118 break;
119
120 case LogType::kLogDeliveryTimeOnly:
121 break;
122 }
123
124 MessageHeader::Builder message_header_builder(*fbb);
125 message_header_builder.add_channel_index(channel_index);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800126
127 switch (log_type) {
128 case LogType::kLogRemoteMessage:
129 message_header_builder.add_queue_index(context.remote_queue_index);
130 message_header_builder.add_monotonic_sent_time(
131 context.monotonic_remote_time.time_since_epoch().count());
132 message_header_builder.add_realtime_sent_time(
133 context.realtime_remote_time.time_since_epoch().count());
134 break;
135
136 case LogType::kLogMessage:
137 case LogType::kLogMessageAndDeliveryTime:
138 case LogType::kLogDeliveryTimeOnly:
139 message_header_builder.add_queue_index(context.queue_index);
140 message_header_builder.add_monotonic_sent_time(
141 context.monotonic_event_time.time_since_epoch().count());
142 message_header_builder.add_realtime_sent_time(
143 context.realtime_event_time.time_since_epoch().count());
144 break;
145 }
Austin Schuha36c8902019-12-30 18:07:15 -0800146
147 switch (log_type) {
148 case LogType::kLogMessage:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800149 case LogType::kLogRemoteMessage:
Austin Schuha36c8902019-12-30 18:07:15 -0800150 message_header_builder.add_data(data_offset);
151 break;
152
153 case LogType::kLogMessageAndDeliveryTime:
154 message_header_builder.add_data(data_offset);
155 [[fallthrough]];
156
157 case LogType::kLogDeliveryTimeOnly:
158 message_header_builder.add_monotonic_remote_time(
159 context.monotonic_remote_time.time_since_epoch().count());
160 message_header_builder.add_realtime_remote_time(
161 context.realtime_remote_time.time_since_epoch().count());
162 message_header_builder.add_remote_queue_index(context.remote_queue_index);
163 break;
164 }
165
166 return message_header_builder.Finish();
167}
168
Austin Schuh05b70472020-01-01 17:11:17 -0800169SpanReader::SpanReader(std::string_view filename)
Austin Schuh6f3babe2020-01-26 20:34:50 -0800170 : filename_(filename),
171 fd_(open(std::string(filename).c_str(), O_RDONLY | O_CLOEXEC)) {
Austin Schuh05b70472020-01-01 17:11:17 -0800172 PCHECK(fd_ != -1) << ": Failed to open " << filename;
173}
174
175absl::Span<const uint8_t> SpanReader::ReadMessage() {
176 // Make sure we have enough for the size.
177 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
178 if (!ReadBlock()) {
179 return absl::Span<const uint8_t>();
180 }
181 }
182
183 // Now make sure we have enough for the message.
184 const size_t data_size =
185 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
186 sizeof(flatbuffers::uoffset_t);
Austin Schuhe4fca832020-03-07 16:58:53 -0800187 if (data_size == sizeof(flatbuffers::uoffset_t)) {
188 LOG(ERROR) << "Size of data is zero. Log file end is corrupted, skipping.";
189 LOG(ERROR) << " Rest of log file is "
190 << absl::BytesToHexString(std::string_view(
191 reinterpret_cast<const char *>(data_.data() +
192 consumed_data_),
193 data_.size() - consumed_data_));
194 return absl::Span<const uint8_t>();
195 }
Austin Schuh05b70472020-01-01 17:11:17 -0800196 while (data_.size() < consumed_data_ + data_size) {
197 if (!ReadBlock()) {
198 return absl::Span<const uint8_t>();
199 }
200 }
201
202 // And return it, consuming the data.
203 const uint8_t *data_ptr = data_.data() + consumed_data_;
204
205 consumed_data_ += data_size;
206
207 return absl::Span<const uint8_t>(data_ptr, data_size);
208}
209
210bool SpanReader::MessageAvailable() {
211 // Are we big enough to read the size?
212 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
213 return false;
214 }
215
216 // Then, are we big enough to read the full message?
217 const size_t data_size =
218 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
219 sizeof(flatbuffers::uoffset_t);
220 if (data_.size() < consumed_data_ + data_size) {
221 return false;
222 }
223
224 return true;
225}
226
227bool SpanReader::ReadBlock() {
228 if (end_of_file_) {
229 return false;
230 }
231
232 // Appends 256k. This is enough that the read call is efficient. We don't
233 // want to spend too much time reading small chunks because the syscalls for
234 // that will be expensive.
235 constexpr size_t kReadSize = 256 * 1024;
236
237 // Strip off any unused data at the front.
238 if (consumed_data_ != 0) {
239 data_.erase(data_.begin(), data_.begin() + consumed_data_);
240 consumed_data_ = 0;
241 }
242
243 const size_t starting_size = data_.size();
244
245 // This should automatically grow the backing store. It won't shrink if we
246 // get a small chunk later. This reduces allocations when we want to append
247 // more data.
248 data_.resize(data_.size() + kReadSize);
249
250 ssize_t count = read(fd_, &data_[starting_size], kReadSize);
251 data_.resize(starting_size + std::max(count, static_cast<ssize_t>(0)));
252 if (count == 0) {
253 end_of_file_ = true;
254 return false;
255 }
256 PCHECK(count > 0);
257
258 return true;
259}
260
Austin Schuh6f3babe2020-01-26 20:34:50 -0800261FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename) {
262 SpanReader span_reader(filename);
263 // Make sure we have enough to read the size.
264 absl::Span<const uint8_t> config_data = span_reader.ReadMessage();
265
266 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700267 CHECK(config_data != absl::Span<const uint8_t>())
268 << ": Failed to read header from: " << filename;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800269
270 // And copy the config so we have it forever.
271 std::vector<uint8_t> data(
272 config_data.begin() + sizeof(flatbuffers::uoffset_t), config_data.end());
273 return FlatbufferVector<LogFileHeader>(std::move(data));
274}
275
Austin Schuh05b70472020-01-01 17:11:17 -0800276MessageReader::MessageReader(std::string_view filename)
Austin Schuh97789fc2020-08-01 14:42:45 -0700277 : span_reader_(filename),
278 raw_log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuh05b70472020-01-01 17:11:17 -0800279 // Make sure we have enough to read the size.
Austin Schuh97789fc2020-08-01 14:42:45 -0700280 absl::Span<const uint8_t> header_data = span_reader_.ReadMessage();
Austin Schuh05b70472020-01-01 17:11:17 -0800281
282 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700283 CHECK(header_data != absl::Span<const uint8_t>())
284 << ": Failed to read header from: " << filename;
Austin Schuh05b70472020-01-01 17:11:17 -0800285
Austin Schuh97789fc2020-08-01 14:42:45 -0700286 // And copy the header data so we have it forever.
287 std::vector<uint8_t> header_data_copy(
288 header_data.begin() + sizeof(flatbuffers::uoffset_t), header_data.end());
289 raw_log_file_header_ =
290 FlatbufferVector<LogFileHeader>(std::move(header_data_copy));
Austin Schuh05b70472020-01-01 17:11:17 -0800291
Austin Schuhcde938c2020-02-02 17:30:07 -0800292 max_out_of_order_duration_ =
293 std::chrono::nanoseconds(log_file_header()->max_out_of_order_duration());
294
295 VLOG(1) << "Opened " << filename << " as node "
296 << FlatbufferToJson(log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800297}
298
299std::optional<FlatbufferVector<MessageHeader>> MessageReader::ReadMessage() {
300 absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();
301 if (msg_data == absl::Span<const uint8_t>()) {
302 return std::nullopt;
303 }
304
305 FlatbufferVector<MessageHeader> result{std::vector<uint8_t>(
306 msg_data.begin() + sizeof(flatbuffers::uoffset_t), msg_data.end())};
307
308 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
309 chrono::nanoseconds(result.message().monotonic_sent_time()));
310
311 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
Austin Schuh8bd96322020-02-13 21:18:22 -0800312 VLOG(2) << "Read from " << filename() << " data " << FlatbufferToJson(result);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800313 return std::move(result);
Austin Schuh05b70472020-01-01 17:11:17 -0800314}
315
Austin Schuh6f3babe2020-01-26 20:34:50 -0800316SplitMessageReader::SplitMessageReader(
Austin Schuhfa895892020-01-07 20:07:41 -0800317 const std::vector<std::string> &filenames)
318 : filenames_(filenames),
Austin Schuh97789fc2020-08-01 14:42:45 -0700319 log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuhfa895892020-01-07 20:07:41 -0800320 CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";
321
Austin Schuh6f3babe2020-01-26 20:34:50 -0800322 // Grab any log file header. They should all match (and we will check as we
323 // open more of them).
Austin Schuh97789fc2020-08-01 14:42:45 -0700324 log_file_header_ = message_reader_->raw_log_file_header();
Austin Schuhfa895892020-01-07 20:07:41 -0800325
Austin Schuh6f3babe2020-01-26 20:34:50 -0800326 // Setup per channel state.
Austin Schuh05b70472020-01-01 17:11:17 -0800327 channels_.resize(configuration()->channels()->size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800328 for (ChannelData &channel_data : channels_) {
329 channel_data.data.split_reader = this;
330 // Build up the timestamp list.
331 if (configuration::MultiNode(configuration())) {
332 channel_data.timestamps.resize(configuration()->nodes()->size());
333 for (MessageHeaderQueue &queue : channel_data.timestamps) {
334 queue.timestamps = true;
335 queue.split_reader = this;
336 }
337 }
338 }
Austin Schuh05b70472020-01-01 17:11:17 -0800339
Austin Schuh6f3babe2020-01-26 20:34:50 -0800340 // Build up channels_to_write_ as an optimization to make it fast to figure
341 // out which datastructure to place any new data from a channel on.
342 for (const Channel *channel : *configuration()->channels()) {
343 // This is the main case. We will only see data on this node.
344 if (configuration::ChannelIsSendableOnNode(channel, node())) {
345 channels_to_write_.emplace_back(
346 &channels_[channels_to_write_.size()].data);
347 } else
348 // If we can't send, but can receive, we should be able to see
349 // timestamps here.
350 if (configuration::ChannelIsReadableOnNode(channel, node())) {
351 channels_to_write_.emplace_back(
352 &(channels_[channels_to_write_.size()]
353 .timestamps[configuration::GetNodeIndex(configuration(),
354 node())]));
355 } else {
356 channels_to_write_.emplace_back(nullptr);
357 }
358 }
Austin Schuh05b70472020-01-01 17:11:17 -0800359}
360
Austin Schuh6f3babe2020-01-26 20:34:50 -0800361bool SplitMessageReader::NextLogFile() {
Austin Schuhfa895892020-01-07 20:07:41 -0800362 if (next_filename_index_ == filenames_.size()) {
363 return false;
364 }
365 message_reader_ =
366 std::make_unique<MessageReader>(filenames_[next_filename_index_]);
367
368 // We can't support the config diverging between two log file headers. See if
369 // they are the same.
370 if (next_filename_index_ != 0) {
Austin Schuh97789fc2020-08-01 14:42:45 -0700371 CHECK(CompareFlatBuffer(message_reader_->raw_log_file_header(),
372 log_file_header_))
Austin Schuhfa895892020-01-07 20:07:41 -0800373 << ": Header is different between log file chunks "
374 << filenames_[next_filename_index_] << " and "
375 << filenames_[next_filename_index_ - 1] << ", this is not supported.";
376 }
377
378 ++next_filename_index_;
379 return true;
380}
381
Austin Schuh6f3babe2020-01-26 20:34:50 -0800382bool SplitMessageReader::QueueMessages(
Austin Schuhcde938c2020-02-02 17:30:07 -0800383 monotonic_clock::time_point last_dequeued_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800384 // TODO(austin): Once we are happy that everything works, read a 256kb chunk
385 // to reduce the need to re-heap down below.
Austin Schuhcde938c2020-02-02 17:30:07 -0800386
387 // Special case no more data. Otherwise we blow up on the CHECK statement
388 // confirming that we have enough data queued.
389 if (at_end_) {
390 return false;
391 }
392
393 // If this isn't the first time around, confirm that we had enough data queued
394 // to follow the contract.
395 if (time_to_queue_ != monotonic_clock::min_time) {
396 CHECK_LE(last_dequeued_time,
397 newest_timestamp() - max_out_of_order_duration())
398 << " node " << FlatbufferToJson(node()) << " on " << this;
399
400 // Bail if there is enough data already queued.
401 if (last_dequeued_time < time_to_queue_) {
Austin Schuhee711052020-08-24 16:06:09 -0700402 VLOG(1) << MaybeNodeName(target_node_) << "All up to date on " << this
403 << ", dequeued " << last_dequeued_time << " queue time "
404 << time_to_queue_;
Austin Schuhcde938c2020-02-02 17:30:07 -0800405 return true;
406 }
407 } else {
408 // Startup takes a special dance. We want to queue up until the start time,
409 // but we then want to find the next message to read. The conservative
410 // answer is to immediately trigger a second requeue to get things moving.
411 time_to_queue_ = monotonic_start_time();
412 QueueMessages(time_to_queue_);
413 }
414
415 // If we are asked to queue, queue for at least max_out_of_order_duration past
416 // the last known time in the log file (ie the newest timestep read). As long
417 // as we requeue exactly when time_to_queue_ is dequeued and go no further, we
418 // are safe. And since we pop in order, that works.
419 //
420 // Special case the start of the log file. There should be at most 1 message
421 // from each channel at the start of the log file. So always force the start
422 // of the log file to just be read.
423 time_to_queue_ = std::max(time_to_queue_, newest_timestamp());
Austin Schuhee711052020-08-24 16:06:09 -0700424 VLOG(1) << MaybeNodeName(target_node_) << "Queueing, going until "
425 << time_to_queue_ << " " << filename();
Austin Schuhcde938c2020-02-02 17:30:07 -0800426
427 bool was_emplaced = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800428 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800429 // Stop if we have enough.
Brian Silverman98360e22020-04-28 16:51:20 -0700430 if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&
Austin Schuhcde938c2020-02-02 17:30:07 -0800431 was_emplaced) {
Austin Schuhee711052020-08-24 16:06:09 -0700432 VLOG(1) << MaybeNodeName(target_node_) << "Done queueing on " << this
433 << ", queued to " << newest_timestamp() << " with requeue time "
434 << time_to_queue_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800435 return true;
436 }
Austin Schuh05b70472020-01-01 17:11:17 -0800437
Austin Schuh6f3babe2020-01-26 20:34:50 -0800438 if (std::optional<FlatbufferVector<MessageHeader>> msg =
439 message_reader_->ReadMessage()) {
440 const MessageHeader &header = msg.value().message();
441
Austin Schuhcde938c2020-02-02 17:30:07 -0800442 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
443 chrono::nanoseconds(header.monotonic_sent_time()));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800444
Austin Schuh0b5fd032020-03-28 17:36:49 -0700445 if (VLOG_IS_ON(2)) {
Austin Schuhee711052020-08-24 16:06:09 -0700446 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this
447 << " " << filename() << " ttq: " << time_to_queue_ << " now "
448 << newest_timestamp() << " start time "
449 << monotonic_start_time() << " " << FlatbufferToJson(&header);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700450 } else if (VLOG_IS_ON(1)) {
451 FlatbufferVector<MessageHeader> copy = msg.value();
452 copy.mutable_message()->clear_data();
Austin Schuhee711052020-08-24 16:06:09 -0700453 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
454 << filename() << " ttq: " << time_to_queue_ << " now "
455 << newest_timestamp() << " start time "
456 << monotonic_start_time() << " " << FlatbufferToJson(copy);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700457 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800458
459 const int channel_index = header.channel_index();
460 was_emplaced = channels_to_write_[channel_index]->emplace_back(
461 std::move(msg.value()));
462 if (was_emplaced) {
463 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
464 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800465 } else {
466 if (!NextLogFile()) {
Austin Schuhee711052020-08-24 16:06:09 -0700467 VLOG(1) << MaybeNodeName(target_node_) << "No more files, last was "
468 << filenames_.back();
Austin Schuhcde938c2020-02-02 17:30:07 -0800469 at_end_ = true;
Austin Schuh8bd96322020-02-13 21:18:22 -0800470 for (MessageHeaderQueue *queue : channels_to_write_) {
471 if (queue == nullptr || queue->timestamp_merger == nullptr) {
472 continue;
473 }
474 queue->timestamp_merger->NoticeAtEnd();
475 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800476 return false;
477 }
478 }
Austin Schuh05b70472020-01-01 17:11:17 -0800479 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800480}
481
482void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,
483 int channel_index,
484 const Node *target_node) {
485 const Node *reinterpreted_target_node =
486 configuration::GetNodeOrDie(configuration(), target_node);
Austin Schuhee711052020-08-24 16:06:09 -0700487 target_node_ = reinterpreted_target_node;
488
Austin Schuh6f3babe2020-01-26 20:34:50 -0800489 const Channel *const channel =
490 configuration()->channels()->Get(channel_index);
491
Austin Schuhcde938c2020-02-02 17:30:07 -0800492 VLOG(1) << " Configuring merger " << this << " for channel " << channel_index
493 << " "
494 << configuration::CleanedChannelToString(
495 configuration()->channels()->Get(channel_index));
496
Austin Schuh6f3babe2020-01-26 20:34:50 -0800497 MessageHeaderQueue *message_header_queue = nullptr;
498
499 // Figure out if this log file is from our point of view, or the other node's
500 // point of view.
501 if (node() == reinterpreted_target_node) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800502 VLOG(1) << " Replaying as logged node " << filename();
503
504 if (configuration::ChannelIsSendableOnNode(channel, node())) {
505 VLOG(1) << " Data on node";
506 message_header_queue = &(channels_[channel_index].data);
507 } else if (configuration::ChannelIsReadableOnNode(channel, node())) {
508 VLOG(1) << " Timestamps on node";
509 message_header_queue =
510 &(channels_[channel_index].timestamps[configuration::GetNodeIndex(
511 configuration(), node())]);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800512 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800513 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800514 }
515 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800516 VLOG(1) << " Replaying as other node " << filename();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800517 // We are replaying from another node's point of view. The only interesting
Austin Schuhcde938c2020-02-02 17:30:07 -0800518 // data is data that is sent from our node and received on theirs.
519 if (configuration::ChannelIsReadableOnNode(channel,
520 reinterpreted_target_node) &&
521 configuration::ChannelIsSendableOnNode(channel, node())) {
522 VLOG(1) << " Readable on target node";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800523 // Data from another node.
524 message_header_queue = &(channels_[channel_index].data);
525 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800526 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800527 // This is either not sendable on the other node, or is a timestamp and
528 // therefore not interesting.
529 }
530 }
531
532 // If we found one, write it down. This will be nullptr when there is nothing
533 // relevant on this channel on this node for the target node. In that case,
534 // we want to drop the message instead of queueing it.
535 if (message_header_queue != nullptr) {
536 message_header_queue->timestamp_merger = timestamp_merger;
537 }
538}
539
540std::tuple<monotonic_clock::time_point, uint32_t,
541 FlatbufferVector<MessageHeader>>
542SplitMessageReader::PopOldest(int channel_index) {
543 CHECK_GT(channels_[channel_index].data.size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800544 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
545 timestamp = channels_[channel_index].data.front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800546 FlatbufferVector<MessageHeader> front =
547 std::move(channels_[channel_index].data.front());
548 channels_[channel_index].data.pop_front();
Austin Schuhcde938c2020-02-02 17:30:07 -0800549
Austin Schuhee711052020-08-24 16:06:09 -0700550 VLOG(1) << MaybeNodeName(target_node_) << "Popped " << this << " "
551 << std::get<0>(timestamp) << " for " << channel_index;
Austin Schuhcde938c2020-02-02 17:30:07 -0800552
553 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800554
555 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
556 std::move(front));
557}
558
559std::tuple<monotonic_clock::time_point, uint32_t,
560 FlatbufferVector<MessageHeader>>
561SplitMessageReader::PopOldest(int channel, int node_index) {
562 CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800563 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
564 timestamp = channels_[channel].timestamps[node_index].front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800565 FlatbufferVector<MessageHeader> front =
566 std::move(channels_[channel].timestamps[node_index].front());
567 channels_[channel].timestamps[node_index].pop_front();
Austin Schuhcde938c2020-02-02 17:30:07 -0800568
Austin Schuhee711052020-08-24 16:06:09 -0700569 VLOG(1) << MaybeNodeName(target_node_) << "Popped " << this << " "
570 << std::get<0>(timestamp) << " for "
571 << configuration::StrippedChannelToString(
572 configuration()->channels()->Get(channel))
573 << " on " << node_index;
Austin Schuhcde938c2020-02-02 17:30:07 -0800574
575 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800576
577 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
578 std::move(front));
579}
580
Austin Schuhcde938c2020-02-02 17:30:07 -0800581bool SplitMessageReader::MessageHeaderQueue::emplace_back(
Austin Schuh6f3babe2020-01-26 20:34:50 -0800582 FlatbufferVector<MessageHeader> &&msg) {
583 CHECK(split_reader != nullptr);
584
585 // If there is no timestamp merger for this queue, nobody is listening. Drop
586 // the message. This happens when a log file from another node is replayed,
587 // and the timestamp mergers down stream just don't care.
588 if (timestamp_merger == nullptr) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800589 return false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800590 }
591
592 CHECK(timestamps != msg.message().has_data())
593 << ": Got timestamps and data mixed up on a node. "
594 << FlatbufferToJson(msg);
595
596 data_.emplace_back(std::move(msg));
597
598 if (data_.size() == 1u) {
599 // Yup, new data. Notify.
600 if (timestamps) {
601 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
602 } else {
603 timestamp_merger->Update(split_reader, front_timestamp());
604 }
605 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800606
607 return true;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800608}
609
610void SplitMessageReader::MessageHeaderQueue::pop_front() {
611 data_.pop_front();
612 if (data_.size() != 0u) {
613 // Yup, new data.
614 if (timestamps) {
615 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
616 } else {
617 timestamp_merger->Update(split_reader, front_timestamp());
618 }
619 }
Austin Schuh05b70472020-01-01 17:11:17 -0800620}
621
622namespace {
623
Austin Schuh6f3babe2020-01-26 20:34:50 -0800624bool SplitMessageReaderHeapCompare(
625 const std::tuple<monotonic_clock::time_point, uint32_t,
626 SplitMessageReader *>
627 first,
628 const std::tuple<monotonic_clock::time_point, uint32_t,
629 SplitMessageReader *>
630 second) {
631 if (std::get<0>(first) > std::get<0>(second)) {
632 return true;
633 } else if (std::get<0>(first) == std::get<0>(second)) {
634 if (std::get<1>(first) > std::get<1>(second)) {
635 return true;
636 } else if (std::get<1>(first) == std::get<1>(second)) {
637 return std::get<2>(first) > std::get<2>(second);
638 } else {
639 return false;
640 }
641 } else {
642 return false;
643 }
644}
645
Austin Schuh05b70472020-01-01 17:11:17 -0800646bool ChannelHeapCompare(
647 const std::pair<monotonic_clock::time_point, int> first,
648 const std::pair<monotonic_clock::time_point, int> second) {
649 if (first.first > second.first) {
650 return true;
651 } else if (first.first == second.first) {
652 return first.second > second.second;
653 } else {
654 return false;
655 }
656}
657
658} // namespace
659
Austin Schuh6f3babe2020-01-26 20:34:50 -0800660TimestampMerger::TimestampMerger(
661 const Configuration *configuration,
662 std::vector<SplitMessageReader *> split_message_readers, int channel_index,
663 const Node *target_node, ChannelMerger *channel_merger)
664 : configuration_(configuration),
665 split_message_readers_(std::move(split_message_readers)),
666 channel_index_(channel_index),
667 node_index_(configuration::MultiNode(configuration)
668 ? configuration::GetNodeIndex(configuration, target_node)
669 : -1),
670 channel_merger_(channel_merger) {
671 // Tell the readers we care so they know who to notify.
Austin Schuhcde938c2020-02-02 17:30:07 -0800672 VLOG(1) << "Configuring channel " << channel_index << " target node "
673 << FlatbufferToJson(target_node);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800674 for (SplitMessageReader *reader : split_message_readers_) {
675 reader->SetTimestampMerger(this, channel_index, target_node);
676 }
677
678 // And then determine if we need to track timestamps.
679 const Channel *channel = configuration->channels()->Get(channel_index);
680 if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&
681 configuration::ChannelIsReadableOnNode(channel, target_node)) {
682 has_timestamps_ = true;
683 }
684}
685
686void TimestampMerger::PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800687 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
688 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800689 SplitMessageReader *split_message_reader) {
690 DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),
691 [split_message_reader](
692 const std::tuple<monotonic_clock::time_point,
693 uint32_t, SplitMessageReader *>
694 x) {
695 return std::get<2>(x) == split_message_reader;
696 }) == message_heap_.end())
697 << ": Pushing message when it is already in the heap.";
698
699 message_heap_.push_back(std::make_tuple(
700 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
701
702 std::push_heap(message_heap_.begin(), message_heap_.end(),
703 &SplitMessageReaderHeapCompare);
704
705 // If we are just a data merger, don't wait for timestamps.
706 if (!has_timestamps_) {
Brian Silverman8a32ce62020-08-12 12:02:38 -0700707 channel_merger_->Update(std::get<0>(message_heap_[0]), channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800708 pushed_ = true;
709 }
710}
711
Austin Schuhcde938c2020-02-02 17:30:07 -0800712std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
713TimestampMerger::oldest_message() const {
714 CHECK_GT(message_heap_.size(), 0u);
715 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
716 oldest_message_reader = message_heap_.front();
717 return std::get<2>(oldest_message_reader)->oldest_message(channel_index_);
718}
719
720std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
721TimestampMerger::oldest_timestamp() const {
722 CHECK_GT(timestamp_heap_.size(), 0u);
723 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
724 oldest_message_reader = timestamp_heap_.front();
725 return std::get<2>(oldest_message_reader)
726 ->oldest_message(channel_index_, node_index_);
727}
728
Austin Schuh6f3babe2020-01-26 20:34:50 -0800729void TimestampMerger::PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800730 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
731 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800732 SplitMessageReader *split_message_reader) {
733 DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),
734 [split_message_reader](
735 const std::tuple<monotonic_clock::time_point,
736 uint32_t, SplitMessageReader *>
737 x) {
738 return std::get<2>(x) == split_message_reader;
739 }) == timestamp_heap_.end())
740 << ": Pushing timestamp when it is already in the heap.";
741
742 timestamp_heap_.push_back(std::make_tuple(
743 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
744
745 std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
746 SplitMessageReaderHeapCompare);
747
748 // If we are a timestamp merger, don't wait for data. Missing data will be
749 // caught at read time.
750 if (has_timestamps_) {
Brian Silverman8a32ce62020-08-12 12:02:38 -0700751 channel_merger_->Update(std::get<0>(timestamp_heap_[0]), channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800752 pushed_ = true;
753 }
754}
755
756std::tuple<monotonic_clock::time_point, uint32_t,
757 FlatbufferVector<MessageHeader>>
758TimestampMerger::PopMessageHeap() {
759 // Pop the oldest message reader pointer off the heap.
760 CHECK_GT(message_heap_.size(), 0u);
761 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
762 oldest_message_reader = message_heap_.front();
763
764 std::pop_heap(message_heap_.begin(), message_heap_.end(),
765 &SplitMessageReaderHeapCompare);
766 message_heap_.pop_back();
767
768 // Pop the oldest message. This re-pushes any messages from the reader to the
769 // message heap.
770 std::tuple<monotonic_clock::time_point, uint32_t,
771 FlatbufferVector<MessageHeader>>
772 oldest_message =
773 std::get<2>(oldest_message_reader)->PopOldest(channel_index_);
774
775 // Confirm that the time and queue_index we have recorded matches.
776 CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));
777 CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));
778
779 // Now, keep reading until we have found all duplicates.
Brian Silverman8a32ce62020-08-12 12:02:38 -0700780 while (!message_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800781 // See if it is a duplicate.
782 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
783 next_oldest_message_reader = message_heap_.front();
784
Austin Schuhcde938c2020-02-02 17:30:07 -0800785 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
786 next_oldest_message_time = std::get<2>(next_oldest_message_reader)
787 ->oldest_message(channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800788
789 if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&
790 std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {
791 // Pop the message reader pointer.
792 std::pop_heap(message_heap_.begin(), message_heap_.end(),
793 &SplitMessageReaderHeapCompare);
794 message_heap_.pop_back();
795
796 // Pop the next oldest message. This re-pushes any messages from the
797 // reader.
798 std::tuple<monotonic_clock::time_point, uint32_t,
799 FlatbufferVector<MessageHeader>>
800 next_oldest_message = std::get<2>(next_oldest_message_reader)
801 ->PopOldest(channel_index_);
802
803 // And make sure the message matches in it's entirety.
804 CHECK(std::get<2>(oldest_message).span() ==
805 std::get<2>(next_oldest_message).span())
806 << ": Data at the same timestamp doesn't match.";
807 } else {
808 break;
809 }
810 }
811
812 return oldest_message;
813}
814
815std::tuple<monotonic_clock::time_point, uint32_t,
816 FlatbufferVector<MessageHeader>>
817TimestampMerger::PopTimestampHeap() {
818 // Pop the oldest message reader pointer off the heap.
819 CHECK_GT(timestamp_heap_.size(), 0u);
820
821 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
822 oldest_timestamp_reader = timestamp_heap_.front();
823
824 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
825 &SplitMessageReaderHeapCompare);
826 timestamp_heap_.pop_back();
827
828 CHECK(node_index_ != -1) << ": Timestamps in a single node environment";
829
830 // Pop the oldest message. This re-pushes any timestamps from the reader to
831 // the timestamp heap.
832 std::tuple<monotonic_clock::time_point, uint32_t,
833 FlatbufferVector<MessageHeader>>
834 oldest_timestamp = std::get<2>(oldest_timestamp_reader)
835 ->PopOldest(channel_index_, node_index_);
836
837 // Confirm that the time we have recorded matches.
838 CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));
839 CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));
840
841 // TODO(austin): What if we get duplicate timestamps?
842
843 return oldest_timestamp;
844}
845
Austin Schuh8bd96322020-02-13 21:18:22 -0800846TimestampMerger::DeliveryTimestamp TimestampMerger::OldestTimestamp() const {
847 if (!has_timestamps_ || timestamp_heap_.size() == 0u) {
848 return TimestampMerger::DeliveryTimestamp{};
849 }
850
851 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
852 oldest_timestamp_reader = timestamp_heap_.front();
853
854 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
855 oldest_timestamp = std::get<2>(oldest_timestamp_reader)
856 ->oldest_message(channel_index_, node_index_);
857
858 TimestampMerger::DeliveryTimestamp timestamp;
859 timestamp.monotonic_event_time =
860 monotonic_clock::time_point(chrono::nanoseconds(
861 std::get<2>(oldest_timestamp)->monotonic_sent_time()));
862 timestamp.realtime_event_time = realtime_clock::time_point(
863 chrono::nanoseconds(std::get<2>(oldest_timestamp)->realtime_sent_time()));
864
865 timestamp.monotonic_remote_time =
866 monotonic_clock::time_point(chrono::nanoseconds(
867 std::get<2>(oldest_timestamp)->monotonic_remote_time()));
868 timestamp.realtime_remote_time =
869 realtime_clock::time_point(chrono::nanoseconds(
870 std::get<2>(oldest_timestamp)->realtime_remote_time()));
871
872 timestamp.remote_queue_index = std::get<2>(oldest_timestamp)->queue_index();
873 return timestamp;
874}
875
Austin Schuh6f3babe2020-01-26 20:34:50 -0800876std::tuple<TimestampMerger::DeliveryTimestamp, FlatbufferVector<MessageHeader>>
877TimestampMerger::PopOldest() {
878 if (has_timestamps_) {
Austin Schuh8bd96322020-02-13 21:18:22 -0800879 // Read the timestamps.
Austin Schuh6f3babe2020-01-26 20:34:50 -0800880 std::tuple<monotonic_clock::time_point, uint32_t,
881 FlatbufferVector<MessageHeader>>
882 oldest_timestamp = PopTimestampHeap();
883
884 TimestampMerger::DeliveryTimestamp timestamp;
885 timestamp.monotonic_event_time =
886 monotonic_clock::time_point(chrono::nanoseconds(
887 std::get<2>(oldest_timestamp).message().monotonic_sent_time()));
888 timestamp.realtime_event_time =
889 realtime_clock::time_point(chrono::nanoseconds(
890 std::get<2>(oldest_timestamp).message().realtime_sent_time()));
891
892 // Consistency check.
893 CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));
894 CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),
895 std::get<1>(oldest_timestamp));
896
897 monotonic_clock::time_point remote_timestamp_monotonic_time(
898 chrono::nanoseconds(
899 std::get<2>(oldest_timestamp).message().monotonic_remote_time()));
900
Austin Schuh8bd96322020-02-13 21:18:22 -0800901 // See if we have any data. If not, pass the problem up the chain.
Brian Silverman8a32ce62020-08-12 12:02:38 -0700902 if (message_heap_.empty()) {
Austin Schuhee711052020-08-24 16:06:09 -0700903 LOG(WARNING) << MaybeNodeName(configuration_->nodes()->Get(node_index_))
904 << "No data to match timestamp on "
905 << configuration::CleanedChannelToString(
906 configuration_->channels()->Get(channel_index_))
907 << " (" << channel_index_ << ")";
Austin Schuh8bd96322020-02-13 21:18:22 -0800908 return std::make_tuple(timestamp,
909 std::move(std::get<2>(oldest_timestamp)));
910 }
911
Austin Schuh6f3babe2020-01-26 20:34:50 -0800912 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800913 {
914 // Ok, now try grabbing data until we find one which matches.
915 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
916 oldest_message_ref = oldest_message();
917
918 // Time at which the message was sent (this message is written from the
919 // sending node's perspective.
920 monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(
921 std::get<2>(oldest_message_ref)->monotonic_sent_time()));
922
923 if (remote_monotonic_time < remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -0700924 LOG(WARNING) << configuration_->nodes()
925 ->Get(node_index_)
926 ->name()
927 ->string_view()
928 << " Undelivered message, skipping. Remote time is "
929 << remote_monotonic_time << " timestamp is "
930 << remote_timestamp_monotonic_time << " on channel "
931 << configuration::StrippedChannelToString(
932 configuration_->channels()->Get(channel_index_))
933 << " (" << channel_index_ << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800934 PopMessageHeap();
935 continue;
936 } else if (remote_monotonic_time > remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -0700937 LOG(WARNING) << configuration_->nodes()
938 ->Get(node_index_)
939 ->name()
940 ->string_view()
941 << " Data not found. Remote time should be "
942 << remote_timestamp_monotonic_time
943 << ", message time is " << remote_monotonic_time
944 << " on channel "
945 << configuration::StrippedChannelToString(
946 configuration_->channels()->Get(channel_index_))
947 << " (" << channel_index_ << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800948 return std::make_tuple(timestamp,
949 std::move(std::get<2>(oldest_timestamp)));
950 }
951
952 timestamp.monotonic_remote_time = remote_monotonic_time;
953 }
954
Austin Schuh6f3babe2020-01-26 20:34:50 -0800955 std::tuple<monotonic_clock::time_point, uint32_t,
956 FlatbufferVector<MessageHeader>>
957 oldest_message = PopMessageHeap();
958
Austin Schuh6f3babe2020-01-26 20:34:50 -0800959 timestamp.realtime_remote_time =
960 realtime_clock::time_point(chrono::nanoseconds(
961 std::get<2>(oldest_message).message().realtime_sent_time()));
962 timestamp.remote_queue_index =
963 std::get<2>(oldest_message).message().queue_index();
964
Austin Schuhcde938c2020-02-02 17:30:07 -0800965 CHECK_EQ(timestamp.monotonic_remote_time,
966 remote_timestamp_monotonic_time);
967
968 CHECK_EQ(timestamp.remote_queue_index,
969 std::get<2>(oldest_timestamp).message().remote_queue_index())
970 << ": " << FlatbufferToJson(&std::get<2>(oldest_timestamp).message())
971 << " data "
972 << FlatbufferToJson(&std::get<2>(oldest_message).message());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800973
Austin Schuh30dd5c52020-08-01 14:43:44 -0700974 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800975 }
976 } else {
977 std::tuple<monotonic_clock::time_point, uint32_t,
978 FlatbufferVector<MessageHeader>>
979 oldest_message = PopMessageHeap();
980
981 TimestampMerger::DeliveryTimestamp timestamp;
982 timestamp.monotonic_event_time =
983 monotonic_clock::time_point(chrono::nanoseconds(
984 std::get<2>(oldest_message).message().monotonic_sent_time()));
985 timestamp.realtime_event_time =
986 realtime_clock::time_point(chrono::nanoseconds(
987 std::get<2>(oldest_message).message().realtime_sent_time()));
988 timestamp.remote_queue_index = 0xffffffff;
989
990 CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);
991 CHECK_EQ(std::get<1>(oldest_message),
992 std::get<2>(oldest_message).message().queue_index());
993
Austin Schuh30dd5c52020-08-01 14:43:44 -0700994 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800995 }
996}
997
Austin Schuh8bd96322020-02-13 21:18:22 -0800998void TimestampMerger::NoticeAtEnd() { channel_merger_->NoticeAtEnd(); }
999
Austin Schuh6f3babe2020-01-26 20:34:50 -08001000namespace {
1001std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(
1002 const std::vector<std::vector<std::string>> &filenames) {
1003 CHECK_GT(filenames.size(), 0u);
1004 // Build up all the SplitMessageReaders.
1005 std::vector<std::unique_ptr<SplitMessageReader>> result;
1006 for (const std::vector<std::string> &filenames : filenames) {
1007 result.emplace_back(std::make_unique<SplitMessageReader>(filenames));
1008 }
1009 return result;
1010}
1011} // namespace
1012
1013ChannelMerger::ChannelMerger(
1014 const std::vector<std::vector<std::string>> &filenames)
1015 : split_message_readers_(MakeSplitMessageReaders(filenames)),
Austin Schuh97789fc2020-08-01 14:42:45 -07001016 log_file_header_(split_message_readers_[0]->raw_log_file_header()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001017 // Now, confirm that the configuration matches for each and pick a start time.
1018 // Also return the list of possible nodes.
1019 for (const std::unique_ptr<SplitMessageReader> &reader :
1020 split_message_readers_) {
1021 CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),
1022 reader->log_file_header()->configuration()))
1023 << ": Replaying log files with different configurations isn't "
1024 "supported";
1025 }
1026
1027 nodes_ = configuration::GetNodes(configuration());
1028}
1029
1030bool ChannelMerger::SetNode(const Node *target_node) {
1031 std::vector<SplitMessageReader *> split_message_readers;
1032 for (const std::unique_ptr<SplitMessageReader> &reader :
1033 split_message_readers_) {
1034 split_message_readers.emplace_back(reader.get());
1035 }
1036
1037 // Go find a log_file_header for this node.
1038 {
1039 bool found_node = false;
1040
1041 for (const std::unique_ptr<SplitMessageReader> &reader :
1042 split_message_readers_) {
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001043 // In order to identify which logfile(s) map to the target node, do a
1044 // logical comparison of the nodes, by confirming that we are either in a
1045 // single-node setup (where the nodes will both be nullptr) or that the
1046 // node names match (but the other node fields--e.g., hostname lists--may
1047 // not).
1048 const bool both_null =
1049 reader->node() == nullptr && target_node == nullptr;
1050 const bool both_have_name =
1051 (reader->node() != nullptr) && (target_node != nullptr) &&
1052 (reader->node()->has_name() && target_node->has_name());
1053 const bool node_names_identical =
1054 both_have_name &&
1055 (reader->node()->name()->string_view() ==
1056 target_node->name()->string_view());
1057 if (both_null || node_names_identical) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001058 if (!found_node) {
1059 found_node = true;
1060 log_file_header_ = CopyFlatBuffer(reader->log_file_header());
Austin Schuhcde938c2020-02-02 17:30:07 -08001061 VLOG(1) << "Found log file " << reader->filename() << " with node "
1062 << FlatbufferToJson(reader->node()) << " start_time "
1063 << monotonic_start_time();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001064 } else {
1065 // And then make sure all the other files have matching headers.
Austin Schuhcde938c2020-02-02 17:30:07 -08001066 CHECK(CompareFlatBuffer(log_file_header(), reader->log_file_header()))
1067 << ": " << FlatbufferToJson(log_file_header()) << " reader "
1068 << FlatbufferToJson(reader->log_file_header());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001069 }
1070 }
1071 }
1072
1073 if (!found_node) {
1074 LOG(WARNING) << "Failed to find log file for node "
1075 << FlatbufferToJson(target_node);
1076 return false;
1077 }
1078 }
1079
1080 // Build up all the timestamp mergers. This connects up all the
1081 // SplitMessageReaders.
1082 timestamp_mergers_.reserve(configuration()->channels()->size());
1083 for (size_t channel_index = 0;
1084 channel_index < configuration()->channels()->size(); ++channel_index) {
1085 timestamp_mergers_.emplace_back(
1086 configuration(), split_message_readers, channel_index,
1087 configuration::GetNode(configuration(), target_node), this);
1088 }
1089
1090 // And prime everything.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001091 for (std::unique_ptr<SplitMessageReader> &split_message_reader :
1092 split_message_readers_) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001093 split_message_reader->QueueMessages(
1094 split_message_reader->monotonic_start_time());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001095 }
1096
1097 node_ = configuration::GetNodeOrDie(configuration(), target_node);
1098 return true;
1099}
1100
Austin Schuh858c9f32020-08-31 16:56:12 -07001101monotonic_clock::time_point ChannelMerger::OldestMessageTime() const {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001102 if (channel_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001103 return monotonic_clock::max_time;
1104 }
1105 return channel_heap_.front().first;
1106}
1107
Austin Schuh8bd96322020-02-13 21:18:22 -08001108TimestampMerger::DeliveryTimestamp ChannelMerger::OldestTimestamp() const {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001109 if (timestamp_heap_.empty()) {
Austin Schuh8bd96322020-02-13 21:18:22 -08001110 return TimestampMerger::DeliveryTimestamp{};
1111 }
1112 return timestamp_mergers_[timestamp_heap_.front().second].OldestTimestamp();
1113}
1114
1115TimestampMerger::DeliveryTimestamp ChannelMerger::OldestTimestampForChannel(
1116 int channel) const {
Austin Schuh6aa77be2020-02-22 21:06:40 -08001117 // If we didn't find any data for this node, we won't have any mergers. Return
1118 // an invalid timestamp in that case.
1119 if (timestamp_mergers_.size() <= static_cast<size_t>(channel)) {
1120 TimestampMerger::DeliveryTimestamp result;
1121 return result;
1122 }
Austin Schuh8bd96322020-02-13 21:18:22 -08001123 return timestamp_mergers_[channel].OldestTimestamp();
1124}
1125
Austin Schuh6f3babe2020-01-26 20:34:50 -08001126void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,
1127 int channel_index) {
1128 // Pop and recreate the heap if it has already been pushed. And since we are
1129 // pushing again, we don't need to clear pushed.
1130 if (timestamp_mergers_[channel_index].pushed()) {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001131 const auto channel_iterator = std::find_if(
Austin Schuh6f3babe2020-01-26 20:34:50 -08001132 channel_heap_.begin(), channel_heap_.end(),
1133 [channel_index](const std::pair<monotonic_clock::time_point, int> x) {
1134 return x.second == channel_index;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001135 });
1136 DCHECK(channel_iterator != channel_heap_.end());
1137 if (std::get<0>(*channel_iterator) == timestamp) {
1138 // It's already in the heap, in the correct spot, so nothing
1139 // more for us to do here.
1140 return;
1141 }
1142 channel_heap_.erase(channel_iterator);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001143 std::make_heap(channel_heap_.begin(), channel_heap_.end(),
1144 ChannelHeapCompare);
Austin Schuh8bd96322020-02-13 21:18:22 -08001145
1146 if (timestamp_mergers_[channel_index].has_timestamps()) {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001147 const auto timestamp_iterator = std::find_if(
Austin Schuh8bd96322020-02-13 21:18:22 -08001148 timestamp_heap_.begin(), timestamp_heap_.end(),
1149 [channel_index](const std::pair<monotonic_clock::time_point, int> x) {
1150 return x.second == channel_index;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001151 });
1152 DCHECK(timestamp_iterator != timestamp_heap_.end());
1153 if (std::get<0>(*timestamp_iterator) == timestamp) {
1154 // It's already in the heap, in the correct spot, so nothing
1155 // more for us to do here.
1156 return;
1157 }
1158 timestamp_heap_.erase(timestamp_iterator);
Austin Schuh8bd96322020-02-13 21:18:22 -08001159 std::make_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1160 ChannelHeapCompare);
1161 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001162 }
1163
Austin Schuh05b70472020-01-01 17:11:17 -08001164 channel_heap_.push_back(std::make_pair(timestamp, channel_index));
1165
1166 // The default sort puts the newest message first. Use a custom comparator to
1167 // put the oldest message first.
1168 std::push_heap(channel_heap_.begin(), channel_heap_.end(),
1169 ChannelHeapCompare);
Austin Schuh8bd96322020-02-13 21:18:22 -08001170
1171 if (timestamp_mergers_[channel_index].has_timestamps()) {
1172 timestamp_heap_.push_back(std::make_pair(timestamp, channel_index));
1173 std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1174 ChannelHeapCompare);
1175 }
Austin Schuh05b70472020-01-01 17:11:17 -08001176}
1177
Austin Schuh6f3babe2020-01-26 20:34:50 -08001178std::tuple<TimestampMerger::DeliveryTimestamp, int,
1179 FlatbufferVector<MessageHeader>>
1180ChannelMerger::PopOldest() {
Austin Schuh8bd96322020-02-13 21:18:22 -08001181 CHECK_GT(channel_heap_.size(), 0u);
Austin Schuh05b70472020-01-01 17:11:17 -08001182 std::pair<monotonic_clock::time_point, int> oldest_channel_data =
1183 channel_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001184 int channel_index = oldest_channel_data.second;
Austin Schuh05b70472020-01-01 17:11:17 -08001185 std::pop_heap(channel_heap_.begin(), channel_heap_.end(),
1186 &ChannelHeapCompare);
1187 channel_heap_.pop_back();
Austin Schuh8bd96322020-02-13 21:18:22 -08001188
Austin Schuh6f3babe2020-01-26 20:34:50 -08001189 timestamp_mergers_[channel_index].set_pushed(false);
Austin Schuh05b70472020-01-01 17:11:17 -08001190
Austin Schuh6f3babe2020-01-26 20:34:50 -08001191 TimestampMerger *merger = &timestamp_mergers_[channel_index];
Austin Schuh05b70472020-01-01 17:11:17 -08001192
Austin Schuh8bd96322020-02-13 21:18:22 -08001193 if (merger->has_timestamps()) {
1194 CHECK_GT(timestamp_heap_.size(), 0u);
1195 std::pair<monotonic_clock::time_point, int> oldest_timestamp_data =
1196 timestamp_heap_.front();
1197 CHECK(oldest_timestamp_data == oldest_channel_data)
1198 << ": Timestamp heap out of sync.";
1199 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1200 &ChannelHeapCompare);
1201 timestamp_heap_.pop_back();
1202 }
1203
Austin Schuhcde938c2020-02-02 17:30:07 -08001204 // Merger handles any queueing needed from here.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001205 std::tuple<TimestampMerger::DeliveryTimestamp,
1206 FlatbufferVector<MessageHeader>>
1207 message = merger->PopOldest();
Brian Silverman8a32ce62020-08-12 12:02:38 -07001208 DCHECK_EQ(std::get<0>(message).monotonic_event_time,
1209 oldest_channel_data.first)
1210 << ": channel_heap_ was corrupted for " << channel_index << ": "
1211 << DebugString();
Austin Schuh05b70472020-01-01 17:11:17 -08001212
Austin Schuh6f3babe2020-01-26 20:34:50 -08001213 return std::make_tuple(std::get<0>(message), channel_index,
1214 std::move(std::get<1>(message)));
1215}
1216
Austin Schuhcde938c2020-02-02 17:30:07 -08001217std::string SplitMessageReader::MessageHeaderQueue::DebugString() const {
1218 std::stringstream ss;
1219 for (size_t i = 0; i < data_.size(); ++i) {
1220 if (timestamps) {
1221 ss << " msg: ";
1222 } else {
1223 ss << " timestamp: ";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001224 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001225 ss << monotonic_clock::time_point(std::chrono::nanoseconds(
1226 data_[i].message().monotonic_sent_time()))
1227 << " ("
1228 << realtime_clock::time_point(
1229 std::chrono::nanoseconds(data_[i].message().realtime_sent_time()))
1230 << ") " << data_[i].message().queue_index();
1231 if (timestamps) {
1232 ss << " <- remote "
1233 << monotonic_clock::time_point(std::chrono::nanoseconds(
1234 data_[i].message().monotonic_remote_time()))
1235 << " ("
1236 << realtime_clock::time_point(std::chrono::nanoseconds(
1237 data_[i].message().realtime_remote_time()))
1238 << ")";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001239 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001240 ss << "\n";
1241 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001242
Austin Schuhcde938c2020-02-02 17:30:07 -08001243 return ss.str();
1244}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001245
Austin Schuhcde938c2020-02-02 17:30:07 -08001246std::string SplitMessageReader::DebugString(int channel) const {
1247 std::stringstream ss;
1248 ss << "[\n";
1249 ss << channels_[channel].data.DebugString();
1250 ss << " ]";
1251 return ss.str();
1252}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001253
Austin Schuhcde938c2020-02-02 17:30:07 -08001254std::string SplitMessageReader::DebugString(int channel, int node_index) const {
1255 std::stringstream ss;
1256 ss << "[\n";
1257 ss << channels_[channel].timestamps[node_index].DebugString();
1258 ss << " ]";
1259 return ss.str();
1260}
1261
1262std::string TimestampMerger::DebugString() const {
1263 std::stringstream ss;
1264
1265 if (timestamp_heap_.size() > 0) {
1266 ss << " timestamp_heap {\n";
1267 std::vector<
1268 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1269 timestamp_heap = timestamp_heap_;
1270 while (timestamp_heap.size() > 0u) {
1271 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1272 oldest_timestamp_reader = timestamp_heap.front();
1273
1274 ss << " " << std::get<2>(oldest_timestamp_reader) << " "
1275 << std::get<0>(oldest_timestamp_reader) << " queue_index ("
1276 << std::get<1>(oldest_timestamp_reader) << ") ttq "
1277 << std::get<2>(oldest_timestamp_reader)->time_to_queue() << " "
1278 << std::get<2>(oldest_timestamp_reader)->filename() << " -> "
1279 << std::get<2>(oldest_timestamp_reader)
1280 ->DebugString(channel_index_, node_index_)
1281 << "\n";
1282
1283 std::pop_heap(timestamp_heap.begin(), timestamp_heap.end(),
1284 &SplitMessageReaderHeapCompare);
1285 timestamp_heap.pop_back();
1286 }
1287 ss << " }\n";
1288 }
1289
1290 ss << " message_heap {\n";
1291 {
1292 std::vector<
1293 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1294 message_heap = message_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001295 while (!message_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001296 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1297 oldest_message_reader = message_heap.front();
1298
1299 ss << " " << std::get<2>(oldest_message_reader) << " "
1300 << std::get<0>(oldest_message_reader) << " queue_index ("
1301 << std::get<1>(oldest_message_reader) << ") ttq "
1302 << std::get<2>(oldest_message_reader)->time_to_queue() << " "
1303 << std::get<2>(oldest_message_reader)->filename() << " -> "
1304 << std::get<2>(oldest_message_reader)->DebugString(channel_index_)
1305 << "\n";
1306
1307 std::pop_heap(message_heap.begin(), message_heap.end(),
1308 &SplitMessageReaderHeapCompare);
1309 message_heap.pop_back();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001310 }
Austin Schuh05b70472020-01-01 17:11:17 -08001311 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001312 ss << " }";
1313
1314 return ss.str();
1315}
1316
1317std::string ChannelMerger::DebugString() const {
1318 std::stringstream ss;
1319 ss << "start_time " << realtime_start_time() << " " << monotonic_start_time()
1320 << "\n";
1321 ss << "channel_heap {\n";
1322 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1323 channel_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001324 while (!channel_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001325 std::tuple<monotonic_clock::time_point, int> channel = channel_heap.front();
1326 ss << " " << std::get<0>(channel) << " (" << std::get<1>(channel) << ") "
1327 << configuration::CleanedChannelToString(
1328 configuration()->channels()->Get(std::get<1>(channel)))
1329 << "\n";
1330
1331 ss << timestamp_mergers_[std::get<1>(channel)].DebugString() << "\n";
1332
1333 std::pop_heap(channel_heap.begin(), channel_heap.end(),
1334 &ChannelHeapCompare);
1335 channel_heap.pop_back();
1336 }
1337 ss << "}";
1338
1339 return ss.str();
Austin Schuh05b70472020-01-01 17:11:17 -08001340}
1341
Austin Schuhee711052020-08-24 16:06:09 -07001342std::string MaybeNodeName(const Node *node) {
1343 if (node != nullptr) {
1344 return node->name()->str() + " ";
1345 }
1346 return "";
1347}
1348
Austin Schuha36c8902019-12-30 18:07:15 -08001349} // namespace logger
1350} // namespace aos