blob: 9d8145dc4096c5ac277fe7b6f27b0281fc3d8fd5 [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#include "aos/events/logging/logfile_utils.h"
2
3#include <fcntl.h>
Austin Schuha36c8902019-12-30 18:07:15 -08004#include <sys/stat.h>
5#include <sys/types.h>
6#include <sys/uio.h>
7
Brian Silvermanf51499a2020-09-21 12:49:08 -07008#include <algorithm>
9#include <climits>
Austin Schuha36c8902019-12-30 18:07:15 -080010
Austin Schuhe4fca832020-03-07 16:58:53 -080011#include "absl/strings/escaping.h"
Austin Schuh05b70472020-01-01 17:11:17 -080012#include "aos/configuration.h"
Austin Schuhfa895892020-01-07 20:07:41 -080013#include "aos/flatbuffer_merge.h"
Austin Schuh6f3babe2020-01-26 20:34:50 -080014#include "aos/util/file.h"
Austin Schuha36c8902019-12-30 18:07:15 -080015#include "flatbuffers/flatbuffers.h"
Austin Schuh05b70472020-01-01 17:11:17 -080016#include "gflags/gflags.h"
17#include "glog/logging.h"
Austin Schuha36c8902019-12-30 18:07:15 -080018
Austin Schuh7fbf5a72020-09-21 16:28:13 -070019DEFINE_int32(flush_size, 128000,
Austin Schuha36c8902019-12-30 18:07:15 -080020 "Number of outstanding bytes to allow before flushing to disk.");
21
Brian Silvermanf51499a2020-09-21 12:49:08 -070022namespace aos::logger {
Austin Schuha36c8902019-12-30 18:07:15 -080023
Austin Schuh05b70472020-01-01 17:11:17 -080024namespace chrono = std::chrono;
25
Brian Silvermanf51499a2020-09-21 12:49:08 -070026DetachedBufferWriter::DetachedBufferWriter(
27 std::string_view filename, std::unique_ptr<DetachedBufferEncoder> encoder)
28 : filename_(filename), encoder_(std::move(encoder)) {
Austin Schuh6f3babe2020-01-26 20:34:50 -080029 util::MkdirP(filename, 0777);
30 fd_ = open(std::string(filename).c_str(),
31 O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
32 VLOG(1) << "Opened " << filename << " for writing";
33 PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
Austin Schuha36c8902019-12-30 18:07:15 -080034}
35
36DetachedBufferWriter::~DetachedBufferWriter() {
Brian Silvermanf51499a2020-09-21 12:49:08 -070037 encoder_->Finish();
38 while (encoder_->queue_size() > 0) {
39 Flush();
40 }
Austin Schuha36c8902019-12-30 18:07:15 -080041 PLOG_IF(ERROR, close(fd_) == -1) << " Failed to close logfile";
Austin Schuh2f8fd752020-09-01 22:38:28 -070042 VLOG(1) << "Closed " << filename_;
43}
44
Brian Silvermand90905f2020-09-23 14:42:56 -070045DetachedBufferWriter::DetachedBufferWriter(DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070046 *this = std::move(other);
47}
48
Brian Silverman87ac0402020-09-17 14:47:01 -070049// When other is destroyed "soon" (which it should be because we're getting an
50// rvalue reference to it), it will flush etc all the data we have queued up
51// (because that data will then be its data).
Austin Schuh2f8fd752020-09-01 22:38:28 -070052DetachedBufferWriter &DetachedBufferWriter::operator=(
53 DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070054 std::swap(filename_, other.filename_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070055 std::swap(encoder_, other.encoder_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070056 std::swap(fd_, other.fd_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070057 std::swap(iovec_, other.iovec_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070058 std::swap(max_write_time_, other.max_write_time_);
59 std::swap(max_write_time_bytes_, other.max_write_time_bytes_);
60 std::swap(max_write_time_messages_, other.max_write_time_messages_);
61 std::swap(total_write_time_, other.total_write_time_);
62 std::swap(total_write_count_, other.total_write_count_);
63 std::swap(total_write_messages_, other.total_write_messages_);
64 std::swap(total_write_bytes_, other.total_write_bytes_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070065 return *this;
Austin Schuha36c8902019-12-30 18:07:15 -080066}
67
Brian Silvermanf51499a2020-09-21 12:49:08 -070068void DetachedBufferWriter::QueueSpan(absl::Span<const uint8_t> span) {
69 if (encoder_->may_bypass() && span.size() > 4096u) {
70 // Over this threshold, we'll assume it's cheaper to add an extra
71 // syscall to write the data immediately instead of copying it to
72 // enqueue.
Austin Schuha36c8902019-12-30 18:07:15 -080073
Brian Silvermanf51499a2020-09-21 12:49:08 -070074 // First, flush everything.
75 while (encoder_->queue_size() > 0u) {
76 Flush();
77 }
Austin Schuhde031b72020-01-10 19:34:41 -080078
Brian Silvermanf51499a2020-09-21 12:49:08 -070079 // Then, write it directly.
80 const auto start = aos::monotonic_clock::now();
81 const ssize_t written = write(fd_, span.data(), span.size());
82 const auto end = aos::monotonic_clock::now();
83 PCHECK(written >= 0) << ": write failed";
84 CHECK_EQ(written, static_cast<ssize_t>(span.size()))
85 << ": Wrote " << written << " expected " << span.size();
86 UpdateStatsForWrite(end - start, written, 1);
87 } else {
88 encoder_->Encode(CopySpanAsDetachedBuffer(span));
Austin Schuha36c8902019-12-30 18:07:15 -080089 }
Brian Silvermanf51499a2020-09-21 12:49:08 -070090
91 FlushAtThreshold();
Austin Schuha36c8902019-12-30 18:07:15 -080092}
93
94void DetachedBufferWriter::Flush() {
Brian Silvermanf51499a2020-09-21 12:49:08 -070095 const auto queue = encoder_->queue();
96 if (queue.empty()) {
Austin Schuha36c8902019-12-30 18:07:15 -080097 return;
98 }
Brian Silvermanf51499a2020-09-21 12:49:08 -070099
Austin Schuha36c8902019-12-30 18:07:15 -0800100 iovec_.clear();
Brian Silvermanf51499a2020-09-21 12:49:08 -0700101 const size_t iovec_size = std::min<size_t>(queue.size(), IOV_MAX);
102 iovec_.resize(iovec_size);
Austin Schuha36c8902019-12-30 18:07:15 -0800103 size_t counted_size = 0;
Brian Silvermanf51499a2020-09-21 12:49:08 -0700104 for (size_t i = 0; i < iovec_size; ++i) {
105 iovec_[i].iov_base = const_cast<uint8_t *>(queue[i].data());
106 iovec_[i].iov_len = queue[i].size();
107 counted_size += iovec_[i].iov_len;
Austin Schuha36c8902019-12-30 18:07:15 -0800108 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700109
110 const auto start = aos::monotonic_clock::now();
Austin Schuha36c8902019-12-30 18:07:15 -0800111 const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700112 const auto end = aos::monotonic_clock::now();
113 PCHECK(written >= 0) << ": writev failed";
Austin Schuha36c8902019-12-30 18:07:15 -0800114 // TODO(austin): Handle partial writes in some way other than crashing...
Brian Silvermanf51499a2020-09-21 12:49:08 -0700115 CHECK_EQ(written, static_cast<ssize_t>(counted_size))
116 << ": Wrote " << written << " expected " << counted_size;
117
118 encoder_->Clear(iovec_size);
119
120 UpdateStatsForWrite(end - start, written, iovec_size);
121}
122
123void DetachedBufferWriter::UpdateStatsForWrite(
124 aos::monotonic_clock::duration duration, ssize_t written, int iovec_size) {
125 if (duration > max_write_time_) {
126 max_write_time_ = duration;
127 max_write_time_bytes_ = written;
128 max_write_time_messages_ = iovec_size;
129 }
130 total_write_time_ += duration;
131 ++total_write_count_;
132 total_write_messages_ += iovec_size;
133 total_write_bytes_ += written;
134}
135
136void DetachedBufferWriter::FlushAtThreshold() {
137 // Flush if we are at the max number of iovs per writev, because there's no
138 // point queueing up any more data in memory. Also flush once we have enough
139 // data queued up.
140 while (encoder_->queued_bytes() > static_cast<size_t>(FLAGS_flush_size) ||
141 encoder_->queue_size() >= IOV_MAX) {
142 Flush();
143 }
Austin Schuha36c8902019-12-30 18:07:15 -0800144}
145
146flatbuffers::Offset<MessageHeader> PackMessage(
147 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
148 int channel_index, LogType log_type) {
149 flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;
150
151 switch (log_type) {
152 case LogType::kLogMessage:
153 case LogType::kLogMessageAndDeliveryTime:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800154 case LogType::kLogRemoteMessage:
Brian Silvermaneaa41d62020-07-08 19:47:35 -0700155 data_offset = fbb->CreateVector(
156 static_cast<const uint8_t *>(context.data), context.size);
Austin Schuha36c8902019-12-30 18:07:15 -0800157 break;
158
159 case LogType::kLogDeliveryTimeOnly:
160 break;
161 }
162
163 MessageHeader::Builder message_header_builder(*fbb);
164 message_header_builder.add_channel_index(channel_index);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800165
166 switch (log_type) {
167 case LogType::kLogRemoteMessage:
168 message_header_builder.add_queue_index(context.remote_queue_index);
169 message_header_builder.add_monotonic_sent_time(
170 context.monotonic_remote_time.time_since_epoch().count());
171 message_header_builder.add_realtime_sent_time(
172 context.realtime_remote_time.time_since_epoch().count());
173 break;
174
175 case LogType::kLogMessage:
176 case LogType::kLogMessageAndDeliveryTime:
177 case LogType::kLogDeliveryTimeOnly:
178 message_header_builder.add_queue_index(context.queue_index);
179 message_header_builder.add_monotonic_sent_time(
180 context.monotonic_event_time.time_since_epoch().count());
181 message_header_builder.add_realtime_sent_time(
182 context.realtime_event_time.time_since_epoch().count());
183 break;
184 }
Austin Schuha36c8902019-12-30 18:07:15 -0800185
186 switch (log_type) {
187 case LogType::kLogMessage:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800188 case LogType::kLogRemoteMessage:
Austin Schuha36c8902019-12-30 18:07:15 -0800189 message_header_builder.add_data(data_offset);
190 break;
191
192 case LogType::kLogMessageAndDeliveryTime:
193 message_header_builder.add_data(data_offset);
194 [[fallthrough]];
195
196 case LogType::kLogDeliveryTimeOnly:
197 message_header_builder.add_monotonic_remote_time(
198 context.monotonic_remote_time.time_since_epoch().count());
199 message_header_builder.add_realtime_remote_time(
200 context.realtime_remote_time.time_since_epoch().count());
201 message_header_builder.add_remote_queue_index(context.remote_queue_index);
202 break;
203 }
204
205 return message_header_builder.Finish();
206}
207
Brian Silvermanf51499a2020-09-21 12:49:08 -0700208SpanReader::SpanReader(std::string_view filename) : filename_(filename) {
209 // Support for other kinds of decoders based on the filename should be added
210 // here.
211 decoder_ = std::make_unique<DummyDecoder>(filename);
Austin Schuh05b70472020-01-01 17:11:17 -0800212}
213
214absl::Span<const uint8_t> SpanReader::ReadMessage() {
215 // Make sure we have enough for the size.
216 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
217 if (!ReadBlock()) {
218 return absl::Span<const uint8_t>();
219 }
220 }
221
222 // Now make sure we have enough for the message.
223 const size_t data_size =
224 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
225 sizeof(flatbuffers::uoffset_t);
Austin Schuhe4fca832020-03-07 16:58:53 -0800226 if (data_size == sizeof(flatbuffers::uoffset_t)) {
227 LOG(ERROR) << "Size of data is zero. Log file end is corrupted, skipping.";
228 LOG(ERROR) << " Rest of log file is "
229 << absl::BytesToHexString(std::string_view(
230 reinterpret_cast<const char *>(data_.data() +
231 consumed_data_),
232 data_.size() - consumed_data_));
233 return absl::Span<const uint8_t>();
234 }
Austin Schuh05b70472020-01-01 17:11:17 -0800235 while (data_.size() < consumed_data_ + data_size) {
236 if (!ReadBlock()) {
237 return absl::Span<const uint8_t>();
238 }
239 }
240
241 // And return it, consuming the data.
242 const uint8_t *data_ptr = data_.data() + consumed_data_;
243
244 consumed_data_ += data_size;
245
246 return absl::Span<const uint8_t>(data_ptr, data_size);
247}
248
249bool SpanReader::MessageAvailable() {
250 // Are we big enough to read the size?
251 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
252 return false;
253 }
254
255 // Then, are we big enough to read the full message?
256 const size_t data_size =
257 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
258 sizeof(flatbuffers::uoffset_t);
259 if (data_.size() < consumed_data_ + data_size) {
260 return false;
261 }
262
263 return true;
264}
265
266bool SpanReader::ReadBlock() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700267 // This is the amount of data we grab at a time. Doing larger chunks minimizes
268 // syscalls and helps decompressors batch things more efficiently.
Austin Schuh05b70472020-01-01 17:11:17 -0800269 constexpr size_t kReadSize = 256 * 1024;
270
271 // Strip off any unused data at the front.
272 if (consumed_data_ != 0) {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700273 data_.erase_front(consumed_data_);
Austin Schuh05b70472020-01-01 17:11:17 -0800274 consumed_data_ = 0;
275 }
276
277 const size_t starting_size = data_.size();
278
279 // This should automatically grow the backing store. It won't shrink if we
280 // get a small chunk later. This reduces allocations when we want to append
281 // more data.
Brian Silvermanf51499a2020-09-21 12:49:08 -0700282 data_.resize(starting_size + kReadSize);
Austin Schuh05b70472020-01-01 17:11:17 -0800283
Brian Silvermanf51499a2020-09-21 12:49:08 -0700284 const size_t count =
285 decoder_->Read(data_.begin() + starting_size, data_.end());
286 data_.resize(starting_size + count);
Austin Schuh05b70472020-01-01 17:11:17 -0800287 if (count == 0) {
Austin Schuh05b70472020-01-01 17:11:17 -0800288 return false;
289 }
Austin Schuh05b70472020-01-01 17:11:17 -0800290
291 return true;
292}
293
Austin Schuh6f3babe2020-01-26 20:34:50 -0800294FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename) {
295 SpanReader span_reader(filename);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800296 absl::Span<const uint8_t> config_data = span_reader.ReadMessage();
297
298 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700299 CHECK(config_data != absl::Span<const uint8_t>())
300 << ": Failed to read header from: " << filename;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800301
Austin Schuh5212cad2020-09-09 23:12:09 -0700302 // And copy the config so we have it forever, removing the size prefix.
Brian Silverman354697a2020-09-22 21:06:32 -0700303 ResizeableBuffer data;
304 data.resize(config_data.size() - sizeof(flatbuffers::uoffset_t));
305 memcpy(data.data(), config_data.begin() + sizeof(flatbuffers::uoffset_t),
306 data.size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800307 return FlatbufferVector<LogFileHeader>(std::move(data));
308}
309
Austin Schuh5212cad2020-09-09 23:12:09 -0700310FlatbufferVector<MessageHeader> ReadNthMessage(std::string_view filename,
311 size_t n) {
312 SpanReader span_reader(filename);
313 absl::Span<const uint8_t> data_span = span_reader.ReadMessage();
314 for (size_t i = 0; i < n + 1; ++i) {
315 data_span = span_reader.ReadMessage();
316
317 // Make sure something was read.
318 CHECK(data_span != absl::Span<const uint8_t>())
319 << ": Failed to read data from: " << filename;
320 }
321
Brian Silverman354697a2020-09-22 21:06:32 -0700322 // And copy the config so we have it forever, removing the size prefix.
323 ResizeableBuffer data;
324 data.resize(data_span.size() - sizeof(flatbuffers::uoffset_t));
325 memcpy(data.data(), data_span.begin() + sizeof(flatbuffers::uoffset_t),
326 data.size());
Austin Schuh5212cad2020-09-09 23:12:09 -0700327 return FlatbufferVector<MessageHeader>(std::move(data));
328}
329
Austin Schuh05b70472020-01-01 17:11:17 -0800330MessageReader::MessageReader(std::string_view filename)
Austin Schuh97789fc2020-08-01 14:42:45 -0700331 : span_reader_(filename),
332 raw_log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuh05b70472020-01-01 17:11:17 -0800333 // Make sure we have enough to read the size.
Austin Schuh97789fc2020-08-01 14:42:45 -0700334 absl::Span<const uint8_t> header_data = span_reader_.ReadMessage();
Austin Schuh05b70472020-01-01 17:11:17 -0800335
336 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700337 CHECK(header_data != absl::Span<const uint8_t>())
338 << ": Failed to read header from: " << filename;
Austin Schuh05b70472020-01-01 17:11:17 -0800339
Austin Schuh97789fc2020-08-01 14:42:45 -0700340 // And copy the header data so we have it forever.
Brian Silverman354697a2020-09-22 21:06:32 -0700341 ResizeableBuffer header_data_copy;
342 header_data_copy.resize(header_data.size() - sizeof(flatbuffers::uoffset_t));
343 memcpy(header_data_copy.data(),
344 header_data.begin() + sizeof(flatbuffers::uoffset_t),
345 header_data_copy.size());
Austin Schuh97789fc2020-08-01 14:42:45 -0700346 raw_log_file_header_ =
347 FlatbufferVector<LogFileHeader>(std::move(header_data_copy));
Austin Schuh05b70472020-01-01 17:11:17 -0800348
Austin Schuhcde938c2020-02-02 17:30:07 -0800349 max_out_of_order_duration_ =
Austin Schuh2f8fd752020-09-01 22:38:28 -0700350 chrono::nanoseconds(log_file_header()->max_out_of_order_duration());
Austin Schuhcde938c2020-02-02 17:30:07 -0800351
352 VLOG(1) << "Opened " << filename << " as node "
353 << FlatbufferToJson(log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800354}
355
356std::optional<FlatbufferVector<MessageHeader>> MessageReader::ReadMessage() {
357 absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();
358 if (msg_data == absl::Span<const uint8_t>()) {
359 return std::nullopt;
360 }
361
Brian Silverman354697a2020-09-22 21:06:32 -0700362 ResizeableBuffer result_buffer;
363 result_buffer.resize(msg_data.size() - sizeof(flatbuffers::uoffset_t));
364 memcpy(result_buffer.data(),
365 msg_data.begin() + sizeof(flatbuffers::uoffset_t),
366 result_buffer.size());
367 FlatbufferVector<MessageHeader> result(std::move(result_buffer));
Austin Schuh05b70472020-01-01 17:11:17 -0800368
369 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
370 chrono::nanoseconds(result.message().monotonic_sent_time()));
371
372 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
Austin Schuh8bd96322020-02-13 21:18:22 -0800373 VLOG(2) << "Read from " << filename() << " data " << FlatbufferToJson(result);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800374 return std::move(result);
Austin Schuh05b70472020-01-01 17:11:17 -0800375}
376
Austin Schuh6f3babe2020-01-26 20:34:50 -0800377SplitMessageReader::SplitMessageReader(
Austin Schuhfa895892020-01-07 20:07:41 -0800378 const std::vector<std::string> &filenames)
379 : filenames_(filenames),
Austin Schuh97789fc2020-08-01 14:42:45 -0700380 log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuhfa895892020-01-07 20:07:41 -0800381 CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";
382
Austin Schuh6f3babe2020-01-26 20:34:50 -0800383 // Grab any log file header. They should all match (and we will check as we
384 // open more of them).
Austin Schuh97789fc2020-08-01 14:42:45 -0700385 log_file_header_ = message_reader_->raw_log_file_header();
Austin Schuhfa895892020-01-07 20:07:41 -0800386
Austin Schuh2f8fd752020-09-01 22:38:28 -0700387 for (size_t i = 1; i < filenames_.size(); ++i) {
388 MessageReader message_reader(filenames_[i]);
389
390 const monotonic_clock::time_point new_monotonic_start_time(
391 chrono::nanoseconds(
392 message_reader.log_file_header()->monotonic_start_time()));
393 const realtime_clock::time_point new_realtime_start_time(
394 chrono::nanoseconds(
395 message_reader.log_file_header()->realtime_start_time()));
396
397 // There are 2 types of part files. Part files from before time estimation
398 // has started, and part files after. We don't declare a log file "started"
399 // until time estimation is up. And once a log file starts, it should never
400 // stop again, and should remain constant.
401 // To compare both types of headers, we mutate our saved copy of the header
402 // to match the next chunk by updating time if we detect a stopped ->
403 // started transition.
404 if (monotonic_start_time() == monotonic_clock::min_time) {
405 CHECK_EQ(realtime_start_time(), realtime_clock::min_time);
406 // We should only be missing the monotonic start time when logging data
Brian Silverman87ac0402020-09-17 14:47:01 -0700407 // for remote nodes. We don't have a good way to determine the remote
Austin Schuh2f8fd752020-09-01 22:38:28 -0700408 // realtime offset, so it shouldn't be filled out.
409 // TODO(austin): If we have a good way, feel free to fill it out. It
410 // probably won't be better than we could do in post though with the same
411 // data.
412 CHECK(!log_file_header_.mutable_message()->has_realtime_start_time());
413 if (new_monotonic_start_time != monotonic_clock::min_time) {
414 // If we finally found our start time, update the header. Do this once
415 // because it should never change again.
416 log_file_header_.mutable_message()->mutate_monotonic_start_time(
417 new_monotonic_start_time.time_since_epoch().count());
418 log_file_header_.mutable_message()->mutate_realtime_start_time(
419 new_realtime_start_time.time_since_epoch().count());
420 }
421 }
422
Austin Schuh64fab802020-09-09 22:47:47 -0700423 // We don't have a good way to set the realtime start time on remote nodes.
424 // Confirm it remains consistent.
425 CHECK_EQ(log_file_header_.mutable_message()->has_realtime_start_time(),
426 message_reader.log_file_header()->has_realtime_start_time());
427
428 // Parts index will *not* match unless we set them to match. We only want
429 // to accept the start time and parts mismatching, so set them.
430 log_file_header_.mutable_message()->mutate_parts_index(
431 message_reader.log_file_header()->parts_index());
432
Austin Schuh2f8fd752020-09-01 22:38:28 -0700433 // Now compare that the headers match.
Austin Schuh64fab802020-09-09 22:47:47 -0700434 if (!CompareFlatBuffer(message_reader.raw_log_file_header(),
435 log_file_header_)) {
436 if (message_reader.log_file_header()->has_logger_uuid() &&
437 log_file_header_.message().has_logger_uuid() &&
438 message_reader.log_file_header()->logger_uuid()->string_view() !=
439 log_file_header_.message().logger_uuid()->string_view()) {
440 LOG(FATAL) << "Logger UUIDs don't match between log file chunks "
441 << filenames_[0] << " and " << filenames_[i]
442 << ", this is not supported.";
443 }
444 if (message_reader.log_file_header()->has_parts_uuid() &&
445 log_file_header_.message().has_parts_uuid() &&
446 message_reader.log_file_header()->parts_uuid()->string_view() !=
447 log_file_header_.message().parts_uuid()->string_view()) {
448 LOG(FATAL) << "Parts UUIDs don't match between log file chunks "
449 << filenames_[0] << " and " << filenames_[i]
450 << ", this is not supported.";
451 }
452
453 LOG(FATAL) << "Header is different between log file chunks "
454 << filenames_[0] << " and " << filenames_[i]
455 << ", this is not supported.";
456 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700457 }
Austin Schuh64fab802020-09-09 22:47:47 -0700458 // Put the parts index back to the first log file chunk.
459 log_file_header_.mutable_message()->mutate_parts_index(
460 message_reader_->log_file_header()->parts_index());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700461
Austin Schuh6f3babe2020-01-26 20:34:50 -0800462 // Setup per channel state.
Austin Schuh05b70472020-01-01 17:11:17 -0800463 channels_.resize(configuration()->channels()->size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800464 for (ChannelData &channel_data : channels_) {
465 channel_data.data.split_reader = this;
466 // Build up the timestamp list.
467 if (configuration::MultiNode(configuration())) {
468 channel_data.timestamps.resize(configuration()->nodes()->size());
469 for (MessageHeaderQueue &queue : channel_data.timestamps) {
470 queue.timestamps = true;
471 queue.split_reader = this;
472 }
473 }
474 }
Austin Schuh05b70472020-01-01 17:11:17 -0800475
Austin Schuh6f3babe2020-01-26 20:34:50 -0800476 // Build up channels_to_write_ as an optimization to make it fast to figure
477 // out which datastructure to place any new data from a channel on.
478 for (const Channel *channel : *configuration()->channels()) {
479 // This is the main case. We will only see data on this node.
480 if (configuration::ChannelIsSendableOnNode(channel, node())) {
481 channels_to_write_.emplace_back(
482 &channels_[channels_to_write_.size()].data);
483 } else
484 // If we can't send, but can receive, we should be able to see
485 // timestamps here.
486 if (configuration::ChannelIsReadableOnNode(channel, node())) {
487 channels_to_write_.emplace_back(
488 &(channels_[channels_to_write_.size()]
489 .timestamps[configuration::GetNodeIndex(configuration(),
490 node())]));
491 } else {
492 channels_to_write_.emplace_back(nullptr);
493 }
494 }
Austin Schuh05b70472020-01-01 17:11:17 -0800495}
496
Austin Schuh6f3babe2020-01-26 20:34:50 -0800497bool SplitMessageReader::NextLogFile() {
Austin Schuhfa895892020-01-07 20:07:41 -0800498 if (next_filename_index_ == filenames_.size()) {
499 return false;
500 }
501 message_reader_ =
502 std::make_unique<MessageReader>(filenames_[next_filename_index_]);
503
504 // We can't support the config diverging between two log file headers. See if
505 // they are the same.
506 if (next_filename_index_ != 0) {
Austin Schuh64fab802020-09-09 22:47:47 -0700507 // In order for the headers to identically compare, they need to have the
508 // same parts_index. Rewrite the saved header with the new parts_index,
509 // compare, and then restore.
510 const int32_t original_parts_index =
511 log_file_header_.message().parts_index();
512 log_file_header_.mutable_message()->mutate_parts_index(
513 message_reader_->log_file_header()->parts_index());
514
Austin Schuh97789fc2020-08-01 14:42:45 -0700515 CHECK(CompareFlatBuffer(message_reader_->raw_log_file_header(),
516 log_file_header_))
Austin Schuhfa895892020-01-07 20:07:41 -0800517 << ": Header is different between log file chunks "
518 << filenames_[next_filename_index_] << " and "
519 << filenames_[next_filename_index_ - 1] << ", this is not supported.";
Austin Schuh64fab802020-09-09 22:47:47 -0700520
521 log_file_header_.mutable_message()->mutate_parts_index(
522 original_parts_index);
Austin Schuhfa895892020-01-07 20:07:41 -0800523 }
524
525 ++next_filename_index_;
526 return true;
527}
528
Austin Schuh6f3babe2020-01-26 20:34:50 -0800529bool SplitMessageReader::QueueMessages(
Austin Schuhcde938c2020-02-02 17:30:07 -0800530 monotonic_clock::time_point last_dequeued_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800531 // TODO(austin): Once we are happy that everything works, read a 256kb chunk
532 // to reduce the need to re-heap down below.
Austin Schuhcde938c2020-02-02 17:30:07 -0800533
534 // Special case no more data. Otherwise we blow up on the CHECK statement
535 // confirming that we have enough data queued.
536 if (at_end_) {
537 return false;
538 }
539
540 // If this isn't the first time around, confirm that we had enough data queued
541 // to follow the contract.
542 if (time_to_queue_ != monotonic_clock::min_time) {
543 CHECK_LE(last_dequeued_time,
544 newest_timestamp() - max_out_of_order_duration())
545 << " node " << FlatbufferToJson(node()) << " on " << this;
546
547 // Bail if there is enough data already queued.
548 if (last_dequeued_time < time_to_queue_) {
Austin Schuhee711052020-08-24 16:06:09 -0700549 VLOG(1) << MaybeNodeName(target_node_) << "All up to date on " << this
550 << ", dequeued " << last_dequeued_time << " queue time "
551 << time_to_queue_;
Austin Schuhcde938c2020-02-02 17:30:07 -0800552 return true;
553 }
554 } else {
555 // Startup takes a special dance. We want to queue up until the start time,
556 // but we then want to find the next message to read. The conservative
557 // answer is to immediately trigger a second requeue to get things moving.
558 time_to_queue_ = monotonic_start_time();
559 QueueMessages(time_to_queue_);
560 }
561
562 // If we are asked to queue, queue for at least max_out_of_order_duration past
563 // the last known time in the log file (ie the newest timestep read). As long
564 // as we requeue exactly when time_to_queue_ is dequeued and go no further, we
565 // are safe. And since we pop in order, that works.
566 //
567 // Special case the start of the log file. There should be at most 1 message
568 // from each channel at the start of the log file. So always force the start
569 // of the log file to just be read.
570 time_to_queue_ = std::max(time_to_queue_, newest_timestamp());
Austin Schuhee711052020-08-24 16:06:09 -0700571 VLOG(1) << MaybeNodeName(target_node_) << "Queueing, going until "
572 << time_to_queue_ << " " << filename();
Austin Schuhcde938c2020-02-02 17:30:07 -0800573
574 bool was_emplaced = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800575 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800576 // Stop if we have enough.
Brian Silverman98360e22020-04-28 16:51:20 -0700577 if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&
Austin Schuhcde938c2020-02-02 17:30:07 -0800578 was_emplaced) {
Austin Schuhee711052020-08-24 16:06:09 -0700579 VLOG(1) << MaybeNodeName(target_node_) << "Done queueing on " << this
580 << ", queued to " << newest_timestamp() << " with requeue time "
581 << time_to_queue_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800582 return true;
583 }
Austin Schuh05b70472020-01-01 17:11:17 -0800584
Austin Schuh6f3babe2020-01-26 20:34:50 -0800585 if (std::optional<FlatbufferVector<MessageHeader>> msg =
586 message_reader_->ReadMessage()) {
587 const MessageHeader &header = msg.value().message();
588
Austin Schuhcde938c2020-02-02 17:30:07 -0800589 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
590 chrono::nanoseconds(header.monotonic_sent_time()));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800591
Austin Schuh0b5fd032020-03-28 17:36:49 -0700592 if (VLOG_IS_ON(2)) {
Brian Silvermand90905f2020-09-23 14:42:56 -0700593 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
594 << filename() << " ttq: " << time_to_queue_ << " now "
Austin Schuhee711052020-08-24 16:06:09 -0700595 << newest_timestamp() << " start time "
596 << monotonic_start_time() << " " << FlatbufferToJson(&header);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700597 } else if (VLOG_IS_ON(1)) {
598 FlatbufferVector<MessageHeader> copy = msg.value();
599 copy.mutable_message()->clear_data();
Austin Schuhee711052020-08-24 16:06:09 -0700600 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
601 << filename() << " ttq: " << time_to_queue_ << " now "
602 << newest_timestamp() << " start time "
603 << monotonic_start_time() << " " << FlatbufferToJson(copy);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700604 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800605
606 const int channel_index = header.channel_index();
607 was_emplaced = channels_to_write_[channel_index]->emplace_back(
608 std::move(msg.value()));
609 if (was_emplaced) {
610 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
611 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800612 } else {
613 if (!NextLogFile()) {
Austin Schuhee711052020-08-24 16:06:09 -0700614 VLOG(1) << MaybeNodeName(target_node_) << "No more files, last was "
615 << filenames_.back();
Austin Schuhcde938c2020-02-02 17:30:07 -0800616 at_end_ = true;
Austin Schuh8bd96322020-02-13 21:18:22 -0800617 for (MessageHeaderQueue *queue : channels_to_write_) {
618 if (queue == nullptr || queue->timestamp_merger == nullptr) {
619 continue;
620 }
621 queue->timestamp_merger->NoticeAtEnd();
622 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800623 return false;
624 }
625 }
Austin Schuh05b70472020-01-01 17:11:17 -0800626 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800627}
628
629void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,
630 int channel_index,
631 const Node *target_node) {
632 const Node *reinterpreted_target_node =
633 configuration::GetNodeOrDie(configuration(), target_node);
Austin Schuhee711052020-08-24 16:06:09 -0700634 target_node_ = reinterpreted_target_node;
635
Austin Schuh6f3babe2020-01-26 20:34:50 -0800636 const Channel *const channel =
637 configuration()->channels()->Get(channel_index);
638
Austin Schuhcde938c2020-02-02 17:30:07 -0800639 VLOG(1) << " Configuring merger " << this << " for channel " << channel_index
640 << " "
641 << configuration::CleanedChannelToString(
642 configuration()->channels()->Get(channel_index));
643
Austin Schuh6f3babe2020-01-26 20:34:50 -0800644 MessageHeaderQueue *message_header_queue = nullptr;
645
646 // Figure out if this log file is from our point of view, or the other node's
647 // point of view.
648 if (node() == reinterpreted_target_node) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800649 VLOG(1) << " Replaying as logged node " << filename();
650
651 if (configuration::ChannelIsSendableOnNode(channel, node())) {
652 VLOG(1) << " Data on node";
653 message_header_queue = &(channels_[channel_index].data);
654 } else if (configuration::ChannelIsReadableOnNode(channel, node())) {
655 VLOG(1) << " Timestamps on node";
656 message_header_queue =
657 &(channels_[channel_index].timestamps[configuration::GetNodeIndex(
658 configuration(), node())]);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800659 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800660 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800661 }
662 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800663 VLOG(1) << " Replaying as other node " << filename();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800664 // We are replaying from another node's point of view. The only interesting
Austin Schuhcde938c2020-02-02 17:30:07 -0800665 // data is data that is sent from our node and received on theirs.
666 if (configuration::ChannelIsReadableOnNode(channel,
667 reinterpreted_target_node) &&
668 configuration::ChannelIsSendableOnNode(channel, node())) {
669 VLOG(1) << " Readable on target node";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800670 // Data from another node.
671 message_header_queue = &(channels_[channel_index].data);
672 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800673 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800674 // This is either not sendable on the other node, or is a timestamp and
675 // therefore not interesting.
676 }
677 }
678
679 // If we found one, write it down. This will be nullptr when there is nothing
680 // relevant on this channel on this node for the target node. In that case,
681 // we want to drop the message instead of queueing it.
682 if (message_header_queue != nullptr) {
683 message_header_queue->timestamp_merger = timestamp_merger;
684 }
685}
686
687std::tuple<monotonic_clock::time_point, uint32_t,
688 FlatbufferVector<MessageHeader>>
689SplitMessageReader::PopOldest(int channel_index) {
690 CHECK_GT(channels_[channel_index].data.size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800691 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
692 timestamp = channels_[channel_index].data.front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800693 FlatbufferVector<MessageHeader> front =
694 std::move(channels_[channel_index].data.front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700695 channels_[channel_index].data.PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800696
Austin Schuh2f8fd752020-09-01 22:38:28 -0700697 VLOG(1) << MaybeNodeName(target_node_) << "Popped Data " << this << " "
698 << std::get<0>(timestamp) << " for "
699 << configuration::StrippedChannelToString(
700 configuration()->channels()->Get(channel_index))
701 << " (" << channel_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800702
703 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800704
705 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
706 std::move(front));
707}
708
709std::tuple<monotonic_clock::time_point, uint32_t,
710 FlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -0700711SplitMessageReader::PopOldestTimestamp(int channel, int node_index) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800712 CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800713 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
714 timestamp = channels_[channel].timestamps[node_index].front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800715 FlatbufferVector<MessageHeader> front =
716 std::move(channels_[channel].timestamps[node_index].front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700717 channels_[channel].timestamps[node_index].PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800718
Austin Schuh2f8fd752020-09-01 22:38:28 -0700719 VLOG(1) << MaybeNodeName(target_node_) << "Popped timestamp " << this << " "
Austin Schuhee711052020-08-24 16:06:09 -0700720 << std::get<0>(timestamp) << " for "
721 << configuration::StrippedChannelToString(
722 configuration()->channels()->Get(channel))
Austin Schuh2f8fd752020-09-01 22:38:28 -0700723 << " on "
724 << configuration()->nodes()->Get(node_index)->name()->string_view()
725 << " (" << node_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800726
727 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800728
729 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
730 std::move(front));
731}
732
Austin Schuhcde938c2020-02-02 17:30:07 -0800733bool SplitMessageReader::MessageHeaderQueue::emplace_back(
Austin Schuh6f3babe2020-01-26 20:34:50 -0800734 FlatbufferVector<MessageHeader> &&msg) {
735 CHECK(split_reader != nullptr);
736
737 // If there is no timestamp merger for this queue, nobody is listening. Drop
738 // the message. This happens when a log file from another node is replayed,
739 // and the timestamp mergers down stream just don't care.
740 if (timestamp_merger == nullptr) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800741 return false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800742 }
743
744 CHECK(timestamps != msg.message().has_data())
745 << ": Got timestamps and data mixed up on a node. "
746 << FlatbufferToJson(msg);
747
748 data_.emplace_back(std::move(msg));
749
750 if (data_.size() == 1u) {
751 // Yup, new data. Notify.
752 if (timestamps) {
753 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
754 } else {
755 timestamp_merger->Update(split_reader, front_timestamp());
756 }
757 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800758
759 return true;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800760}
761
Austin Schuh2f8fd752020-09-01 22:38:28 -0700762void SplitMessageReader::MessageHeaderQueue::PopFront() {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800763 data_.pop_front();
764 if (data_.size() != 0u) {
765 // Yup, new data.
766 if (timestamps) {
767 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
768 } else {
769 timestamp_merger->Update(split_reader, front_timestamp());
770 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700771 } else {
772 // Poke anyways to update the heap.
773 if (timestamps) {
774 timestamp_merger->UpdateTimestamp(
775 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
776 } else {
777 timestamp_merger->Update(
778 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
779 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800780 }
Austin Schuh05b70472020-01-01 17:11:17 -0800781}
782
783namespace {
784
Austin Schuh6f3babe2020-01-26 20:34:50 -0800785bool SplitMessageReaderHeapCompare(
786 const std::tuple<monotonic_clock::time_point, uint32_t,
787 SplitMessageReader *>
788 first,
789 const std::tuple<monotonic_clock::time_point, uint32_t,
790 SplitMessageReader *>
791 second) {
792 if (std::get<0>(first) > std::get<0>(second)) {
793 return true;
794 } else if (std::get<0>(first) == std::get<0>(second)) {
795 if (std::get<1>(first) > std::get<1>(second)) {
796 return true;
797 } else if (std::get<1>(first) == std::get<1>(second)) {
798 return std::get<2>(first) > std::get<2>(second);
799 } else {
800 return false;
801 }
802 } else {
803 return false;
804 }
805}
806
Austin Schuh05b70472020-01-01 17:11:17 -0800807bool ChannelHeapCompare(
808 const std::pair<monotonic_clock::time_point, int> first,
809 const std::pair<monotonic_clock::time_point, int> second) {
810 if (first.first > second.first) {
811 return true;
812 } else if (first.first == second.first) {
813 return first.second > second.second;
814 } else {
815 return false;
816 }
817}
818
819} // namespace
820
Austin Schuh6f3babe2020-01-26 20:34:50 -0800821TimestampMerger::TimestampMerger(
822 const Configuration *configuration,
823 std::vector<SplitMessageReader *> split_message_readers, int channel_index,
824 const Node *target_node, ChannelMerger *channel_merger)
825 : configuration_(configuration),
826 split_message_readers_(std::move(split_message_readers)),
827 channel_index_(channel_index),
828 node_index_(configuration::MultiNode(configuration)
829 ? configuration::GetNodeIndex(configuration, target_node)
830 : -1),
831 channel_merger_(channel_merger) {
832 // Tell the readers we care so they know who to notify.
Austin Schuhcde938c2020-02-02 17:30:07 -0800833 VLOG(1) << "Configuring channel " << channel_index << " target node "
834 << FlatbufferToJson(target_node);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800835 for (SplitMessageReader *reader : split_message_readers_) {
836 reader->SetTimestampMerger(this, channel_index, target_node);
837 }
838
839 // And then determine if we need to track timestamps.
840 const Channel *channel = configuration->channels()->Get(channel_index);
841 if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&
842 configuration::ChannelIsReadableOnNode(channel, target_node)) {
843 has_timestamps_ = true;
844 }
845}
846
847void TimestampMerger::PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800848 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
849 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800850 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700851 if (split_message_reader != nullptr) {
852 DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),
853 [split_message_reader](
854 const std::tuple<monotonic_clock::time_point,
855 uint32_t, SplitMessageReader *>
856 x) {
857 return std::get<2>(x) == split_message_reader;
858 }) == message_heap_.end())
859 << ": Pushing message when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800860
Austin Schuh2f8fd752020-09-01 22:38:28 -0700861 message_heap_.push_back(std::make_tuple(
862 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800863
Austin Schuh2f8fd752020-09-01 22:38:28 -0700864 std::push_heap(message_heap_.begin(), message_heap_.end(),
865 &SplitMessageReaderHeapCompare);
866 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800867
868 // If we are just a data merger, don't wait for timestamps.
869 if (!has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700870 if (!message_heap_.empty()) {
871 channel_merger_->Update(std::get<0>(message_heap_[0]), channel_index_);
872 pushed_ = true;
873 } else {
874 // Remove ourselves if we are empty.
875 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
876 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800877 }
878}
879
Austin Schuhcde938c2020-02-02 17:30:07 -0800880std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
881TimestampMerger::oldest_message() const {
882 CHECK_GT(message_heap_.size(), 0u);
883 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
884 oldest_message_reader = message_heap_.front();
885 return std::get<2>(oldest_message_reader)->oldest_message(channel_index_);
886}
887
888std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
889TimestampMerger::oldest_timestamp() const {
890 CHECK_GT(timestamp_heap_.size(), 0u);
891 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
892 oldest_message_reader = timestamp_heap_.front();
893 return std::get<2>(oldest_message_reader)
894 ->oldest_message(channel_index_, node_index_);
895}
896
Austin Schuh6f3babe2020-01-26 20:34:50 -0800897void TimestampMerger::PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800898 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
899 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800900 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700901 if (split_message_reader != nullptr) {
902 DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),
903 [split_message_reader](
904 const std::tuple<monotonic_clock::time_point,
905 uint32_t, SplitMessageReader *>
906 x) {
907 return std::get<2>(x) == split_message_reader;
908 }) == timestamp_heap_.end())
909 << ": Pushing timestamp when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800910
Austin Schuh2f8fd752020-09-01 22:38:28 -0700911 timestamp_heap_.push_back(std::make_tuple(
912 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800913
Austin Schuh2f8fd752020-09-01 22:38:28 -0700914 std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
915 SplitMessageReaderHeapCompare);
916 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800917
918 // If we are a timestamp merger, don't wait for data. Missing data will be
919 // caught at read time.
920 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700921 if (!timestamp_heap_.empty()) {
922 channel_merger_->Update(std::get<0>(timestamp_heap_[0]), channel_index_);
923 pushed_ = true;
924 } else {
925 // Remove ourselves if we are empty.
926 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
927 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800928 }
929}
930
931std::tuple<monotonic_clock::time_point, uint32_t,
932 FlatbufferVector<MessageHeader>>
933TimestampMerger::PopMessageHeap() {
934 // Pop the oldest message reader pointer off the heap.
935 CHECK_GT(message_heap_.size(), 0u);
936 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
937 oldest_message_reader = message_heap_.front();
938
939 std::pop_heap(message_heap_.begin(), message_heap_.end(),
940 &SplitMessageReaderHeapCompare);
941 message_heap_.pop_back();
942
943 // Pop the oldest message. This re-pushes any messages from the reader to the
944 // message heap.
945 std::tuple<monotonic_clock::time_point, uint32_t,
946 FlatbufferVector<MessageHeader>>
947 oldest_message =
948 std::get<2>(oldest_message_reader)->PopOldest(channel_index_);
949
950 // Confirm that the time and queue_index we have recorded matches.
951 CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));
952 CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));
953
954 // Now, keep reading until we have found all duplicates.
Brian Silverman8a32ce62020-08-12 12:02:38 -0700955 while (!message_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800956 // See if it is a duplicate.
957 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
958 next_oldest_message_reader = message_heap_.front();
959
Austin Schuhcde938c2020-02-02 17:30:07 -0800960 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
961 next_oldest_message_time = std::get<2>(next_oldest_message_reader)
962 ->oldest_message(channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800963
964 if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&
965 std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {
966 // Pop the message reader pointer.
967 std::pop_heap(message_heap_.begin(), message_heap_.end(),
968 &SplitMessageReaderHeapCompare);
969 message_heap_.pop_back();
970
971 // Pop the next oldest message. This re-pushes any messages from the
972 // reader.
973 std::tuple<monotonic_clock::time_point, uint32_t,
974 FlatbufferVector<MessageHeader>>
975 next_oldest_message = std::get<2>(next_oldest_message_reader)
976 ->PopOldest(channel_index_);
977
978 // And make sure the message matches in it's entirety.
979 CHECK(std::get<2>(oldest_message).span() ==
980 std::get<2>(next_oldest_message).span())
981 << ": Data at the same timestamp doesn't match.";
982 } else {
983 break;
984 }
985 }
986
987 return oldest_message;
988}
989
990std::tuple<monotonic_clock::time_point, uint32_t,
991 FlatbufferVector<MessageHeader>>
992TimestampMerger::PopTimestampHeap() {
993 // Pop the oldest message reader pointer off the heap.
994 CHECK_GT(timestamp_heap_.size(), 0u);
995
996 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
997 oldest_timestamp_reader = timestamp_heap_.front();
998
999 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1000 &SplitMessageReaderHeapCompare);
1001 timestamp_heap_.pop_back();
1002
1003 CHECK(node_index_ != -1) << ": Timestamps in a single node environment";
1004
1005 // Pop the oldest message. This re-pushes any timestamps from the reader to
1006 // the timestamp heap.
1007 std::tuple<monotonic_clock::time_point, uint32_t,
1008 FlatbufferVector<MessageHeader>>
1009 oldest_timestamp = std::get<2>(oldest_timestamp_reader)
Austin Schuh2f8fd752020-09-01 22:38:28 -07001010 ->PopOldestTimestamp(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001011
1012 // Confirm that the time we have recorded matches.
1013 CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));
1014 CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));
1015
Austin Schuh2f8fd752020-09-01 22:38:28 -07001016 // Now, keep reading until we have found all duplicates.
1017 while (!timestamp_heap_.empty()) {
1018 // See if it is a duplicate.
1019 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1020 next_oldest_timestamp_reader = timestamp_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001021
Austin Schuh2f8fd752020-09-01 22:38:28 -07001022 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1023 next_oldest_timestamp_time =
1024 std::get<2>(next_oldest_timestamp_reader)
1025 ->oldest_message(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001026
Austin Schuh2f8fd752020-09-01 22:38:28 -07001027 if (std::get<0>(next_oldest_timestamp_time) ==
1028 std::get<0>(oldest_timestamp) &&
1029 std::get<1>(next_oldest_timestamp_time) ==
1030 std::get<1>(oldest_timestamp)) {
1031 // Pop the timestamp reader pointer.
1032 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1033 &SplitMessageReaderHeapCompare);
1034 timestamp_heap_.pop_back();
1035
1036 // Pop the next oldest timestamp. This re-pushes any messages from the
1037 // reader.
1038 std::tuple<monotonic_clock::time_point, uint32_t,
1039 FlatbufferVector<MessageHeader>>
1040 next_oldest_timestamp =
1041 std::get<2>(next_oldest_timestamp_reader)
1042 ->PopOldestTimestamp(channel_index_, node_index_);
1043
1044 // And make sure the contents matches in it's entirety.
1045 CHECK(std::get<2>(oldest_timestamp).span() ==
1046 std::get<2>(next_oldest_timestamp).span())
1047 << ": Data at the same timestamp doesn't match, "
1048 << aos::FlatbufferToJson(std::get<2>(oldest_timestamp)) << " vs "
1049 << aos::FlatbufferToJson(std::get<2>(next_oldest_timestamp)) << " "
1050 << absl::BytesToHexString(std::string_view(
1051 reinterpret_cast<const char *>(
1052 std::get<2>(oldest_timestamp).span().data()),
1053 std::get<2>(oldest_timestamp).span().size()))
1054 << " vs "
1055 << absl::BytesToHexString(std::string_view(
1056 reinterpret_cast<const char *>(
1057 std::get<2>(next_oldest_timestamp).span().data()),
1058 std::get<2>(next_oldest_timestamp).span().size()));
1059
1060 } else {
1061 break;
1062 }
Austin Schuh8bd96322020-02-13 21:18:22 -08001063 }
1064
Austin Schuh2f8fd752020-09-01 22:38:28 -07001065 return oldest_timestamp;
Austin Schuh8bd96322020-02-13 21:18:22 -08001066}
1067
Austin Schuh6f3babe2020-01-26 20:34:50 -08001068std::tuple<TimestampMerger::DeliveryTimestamp, FlatbufferVector<MessageHeader>>
1069TimestampMerger::PopOldest() {
1070 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001071 VLOG(1) << "Looking for matching timestamp for "
1072 << configuration::StrippedChannelToString(
1073 configuration_->channels()->Get(channel_index_))
1074 << " (" << channel_index_ << ") "
1075 << " at " << std::get<0>(oldest_timestamp());
1076
Austin Schuh8bd96322020-02-13 21:18:22 -08001077 // Read the timestamps.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001078 std::tuple<monotonic_clock::time_point, uint32_t,
1079 FlatbufferVector<MessageHeader>>
1080 oldest_timestamp = PopTimestampHeap();
1081
1082 TimestampMerger::DeliveryTimestamp timestamp;
1083 timestamp.monotonic_event_time =
1084 monotonic_clock::time_point(chrono::nanoseconds(
1085 std::get<2>(oldest_timestamp).message().monotonic_sent_time()));
1086 timestamp.realtime_event_time =
1087 realtime_clock::time_point(chrono::nanoseconds(
1088 std::get<2>(oldest_timestamp).message().realtime_sent_time()));
1089
1090 // Consistency check.
1091 CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));
1092 CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),
1093 std::get<1>(oldest_timestamp));
1094
1095 monotonic_clock::time_point remote_timestamp_monotonic_time(
1096 chrono::nanoseconds(
1097 std::get<2>(oldest_timestamp).message().monotonic_remote_time()));
1098
Austin Schuh8bd96322020-02-13 21:18:22 -08001099 // See if we have any data. If not, pass the problem up the chain.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001100 if (message_heap_.empty()) {
Austin Schuhee711052020-08-24 16:06:09 -07001101 LOG(WARNING) << MaybeNodeName(configuration_->nodes()->Get(node_index_))
1102 << "No data to match timestamp on "
1103 << configuration::CleanedChannelToString(
1104 configuration_->channels()->Get(channel_index_))
1105 << " (" << channel_index_ << ")";
Austin Schuh8bd96322020-02-13 21:18:22 -08001106 return std::make_tuple(timestamp,
1107 std::move(std::get<2>(oldest_timestamp)));
1108 }
1109
Austin Schuh6f3babe2020-01-26 20:34:50 -08001110 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001111 {
1112 // Ok, now try grabbing data until we find one which matches.
1113 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1114 oldest_message_ref = oldest_message();
1115
1116 // Time at which the message was sent (this message is written from the
1117 // sending node's perspective.
1118 monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(
1119 std::get<2>(oldest_message_ref)->monotonic_sent_time()));
1120
1121 if (remote_monotonic_time < remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001122 LOG(WARNING) << configuration_->nodes()
1123 ->Get(node_index_)
1124 ->name()
1125 ->string_view()
1126 << " Undelivered message, skipping. Remote time is "
1127 << remote_monotonic_time << " timestamp is "
1128 << remote_timestamp_monotonic_time << " on channel "
1129 << configuration::StrippedChannelToString(
1130 configuration_->channels()->Get(channel_index_))
1131 << " (" << channel_index_ << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -08001132 PopMessageHeap();
1133 continue;
1134 } else if (remote_monotonic_time > remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001135 LOG(WARNING) << configuration_->nodes()
1136 ->Get(node_index_)
1137 ->name()
1138 ->string_view()
1139 << " Data not found. Remote time should be "
1140 << remote_timestamp_monotonic_time
1141 << ", message time is " << remote_monotonic_time
1142 << " on channel "
1143 << configuration::StrippedChannelToString(
1144 configuration_->channels()->Get(channel_index_))
Austin Schuh2f8fd752020-09-01 22:38:28 -07001145 << " (" << channel_index_ << ")"
1146 << (VLOG_IS_ON(1) ? DebugString() : "");
Austin Schuhcde938c2020-02-02 17:30:07 -08001147 return std::make_tuple(timestamp,
1148 std::move(std::get<2>(oldest_timestamp)));
1149 }
1150
1151 timestamp.monotonic_remote_time = remote_monotonic_time;
1152 }
1153
Austin Schuh2f8fd752020-09-01 22:38:28 -07001154 VLOG(1) << "Found matching data "
1155 << configuration::StrippedChannelToString(
1156 configuration_->channels()->Get(channel_index_))
1157 << " (" << channel_index_ << ")";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001158 std::tuple<monotonic_clock::time_point, uint32_t,
1159 FlatbufferVector<MessageHeader>>
1160 oldest_message = PopMessageHeap();
1161
Austin Schuh6f3babe2020-01-26 20:34:50 -08001162 timestamp.realtime_remote_time =
1163 realtime_clock::time_point(chrono::nanoseconds(
1164 std::get<2>(oldest_message).message().realtime_sent_time()));
1165 timestamp.remote_queue_index =
1166 std::get<2>(oldest_message).message().queue_index();
1167
Austin Schuhcde938c2020-02-02 17:30:07 -08001168 CHECK_EQ(timestamp.monotonic_remote_time,
1169 remote_timestamp_monotonic_time);
1170
1171 CHECK_EQ(timestamp.remote_queue_index,
1172 std::get<2>(oldest_timestamp).message().remote_queue_index())
1173 << ": " << FlatbufferToJson(&std::get<2>(oldest_timestamp).message())
1174 << " data "
1175 << FlatbufferToJson(&std::get<2>(oldest_message).message());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001176
Austin Schuh30dd5c52020-08-01 14:43:44 -07001177 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001178 }
1179 } else {
1180 std::tuple<monotonic_clock::time_point, uint32_t,
1181 FlatbufferVector<MessageHeader>>
1182 oldest_message = PopMessageHeap();
1183
1184 TimestampMerger::DeliveryTimestamp timestamp;
1185 timestamp.monotonic_event_time =
1186 monotonic_clock::time_point(chrono::nanoseconds(
1187 std::get<2>(oldest_message).message().monotonic_sent_time()));
1188 timestamp.realtime_event_time =
1189 realtime_clock::time_point(chrono::nanoseconds(
1190 std::get<2>(oldest_message).message().realtime_sent_time()));
1191 timestamp.remote_queue_index = 0xffffffff;
1192
1193 CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);
1194 CHECK_EQ(std::get<1>(oldest_message),
1195 std::get<2>(oldest_message).message().queue_index());
1196
Austin Schuh30dd5c52020-08-01 14:43:44 -07001197 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001198 }
1199}
1200
Austin Schuh8bd96322020-02-13 21:18:22 -08001201void TimestampMerger::NoticeAtEnd() { channel_merger_->NoticeAtEnd(); }
1202
Austin Schuh6f3babe2020-01-26 20:34:50 -08001203namespace {
1204std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(
1205 const std::vector<std::vector<std::string>> &filenames) {
1206 CHECK_GT(filenames.size(), 0u);
1207 // Build up all the SplitMessageReaders.
1208 std::vector<std::unique_ptr<SplitMessageReader>> result;
1209 for (const std::vector<std::string> &filenames : filenames) {
1210 result.emplace_back(std::make_unique<SplitMessageReader>(filenames));
1211 }
1212 return result;
1213}
1214} // namespace
1215
1216ChannelMerger::ChannelMerger(
1217 const std::vector<std::vector<std::string>> &filenames)
1218 : split_message_readers_(MakeSplitMessageReaders(filenames)),
Austin Schuh97789fc2020-08-01 14:42:45 -07001219 log_file_header_(split_message_readers_[0]->raw_log_file_header()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001220 // Now, confirm that the configuration matches for each and pick a start time.
1221 // Also return the list of possible nodes.
1222 for (const std::unique_ptr<SplitMessageReader> &reader :
1223 split_message_readers_) {
1224 CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),
1225 reader->log_file_header()->configuration()))
1226 << ": Replaying log files with different configurations isn't "
1227 "supported";
1228 }
1229
1230 nodes_ = configuration::GetNodes(configuration());
1231}
1232
1233bool ChannelMerger::SetNode(const Node *target_node) {
1234 std::vector<SplitMessageReader *> split_message_readers;
1235 for (const std::unique_ptr<SplitMessageReader> &reader :
1236 split_message_readers_) {
1237 split_message_readers.emplace_back(reader.get());
1238 }
1239
1240 // Go find a log_file_header for this node.
1241 {
1242 bool found_node = false;
1243
1244 for (const std::unique_ptr<SplitMessageReader> &reader :
1245 split_message_readers_) {
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001246 // In order to identify which logfile(s) map to the target node, do a
1247 // logical comparison of the nodes, by confirming that we are either in a
1248 // single-node setup (where the nodes will both be nullptr) or that the
1249 // node names match (but the other node fields--e.g., hostname lists--may
1250 // not).
1251 const bool both_null =
1252 reader->node() == nullptr && target_node == nullptr;
1253 const bool both_have_name =
1254 (reader->node() != nullptr) && (target_node != nullptr) &&
1255 (reader->node()->has_name() && target_node->has_name());
1256 const bool node_names_identical =
Brian Silvermand90905f2020-09-23 14:42:56 -07001257 both_have_name && (reader->node()->name()->string_view() ==
1258 target_node->name()->string_view());
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001259 if (both_null || node_names_identical) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001260 if (!found_node) {
1261 found_node = true;
1262 log_file_header_ = CopyFlatBuffer(reader->log_file_header());
Austin Schuhcde938c2020-02-02 17:30:07 -08001263 VLOG(1) << "Found log file " << reader->filename() << " with node "
1264 << FlatbufferToJson(reader->node()) << " start_time "
1265 << monotonic_start_time();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001266 } else {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001267 // Find the earliest start time. That way, if we get a full log file
1268 // directly from the node, and a partial later, we start with the
1269 // full. Update our header to match that.
1270 const monotonic_clock::time_point new_monotonic_start_time(
1271 chrono::nanoseconds(
1272 reader->log_file_header()->monotonic_start_time()));
1273 const realtime_clock::time_point new_realtime_start_time(
1274 chrono::nanoseconds(
1275 reader->log_file_header()->realtime_start_time()));
1276
1277 if (monotonic_start_time() == monotonic_clock::min_time ||
1278 (new_monotonic_start_time != monotonic_clock::min_time &&
1279 new_monotonic_start_time < monotonic_start_time())) {
1280 log_file_header_.mutable_message()->mutate_monotonic_start_time(
1281 new_monotonic_start_time.time_since_epoch().count());
1282 log_file_header_.mutable_message()->mutate_realtime_start_time(
1283 new_realtime_start_time.time_since_epoch().count());
1284 VLOG(1) << "Updated log file " << reader->filename()
1285 << " with node " << FlatbufferToJson(reader->node())
1286 << " start_time " << new_monotonic_start_time;
1287 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001288 }
1289 }
1290 }
1291
1292 if (!found_node) {
1293 LOG(WARNING) << "Failed to find log file for node "
1294 << FlatbufferToJson(target_node);
1295 return false;
1296 }
1297 }
1298
1299 // Build up all the timestamp mergers. This connects up all the
1300 // SplitMessageReaders.
1301 timestamp_mergers_.reserve(configuration()->channels()->size());
1302 for (size_t channel_index = 0;
1303 channel_index < configuration()->channels()->size(); ++channel_index) {
1304 timestamp_mergers_.emplace_back(
1305 configuration(), split_message_readers, channel_index,
1306 configuration::GetNode(configuration(), target_node), this);
1307 }
1308
1309 // And prime everything.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001310 for (std::unique_ptr<SplitMessageReader> &split_message_reader :
1311 split_message_readers_) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001312 split_message_reader->QueueMessages(
1313 split_message_reader->monotonic_start_time());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001314 }
1315
1316 node_ = configuration::GetNodeOrDie(configuration(), target_node);
1317 return true;
1318}
1319
Austin Schuh858c9f32020-08-31 16:56:12 -07001320monotonic_clock::time_point ChannelMerger::OldestMessageTime() const {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001321 if (channel_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001322 return monotonic_clock::max_time;
1323 }
1324 return channel_heap_.front().first;
1325}
1326
1327void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,
1328 int channel_index) {
1329 // Pop and recreate the heap if it has already been pushed. And since we are
1330 // pushing again, we don't need to clear pushed.
1331 if (timestamp_mergers_[channel_index].pushed()) {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001332 const auto channel_iterator = std::find_if(
Austin Schuh6f3babe2020-01-26 20:34:50 -08001333 channel_heap_.begin(), channel_heap_.end(),
1334 [channel_index](const std::pair<monotonic_clock::time_point, int> x) {
1335 return x.second == channel_index;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001336 });
1337 DCHECK(channel_iterator != channel_heap_.end());
1338 if (std::get<0>(*channel_iterator) == timestamp) {
1339 // It's already in the heap, in the correct spot, so nothing
1340 // more for us to do here.
1341 return;
1342 }
1343 channel_heap_.erase(channel_iterator);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001344 std::make_heap(channel_heap_.begin(), channel_heap_.end(),
1345 ChannelHeapCompare);
1346 }
1347
Austin Schuh2f8fd752020-09-01 22:38:28 -07001348 if (timestamp == monotonic_clock::min_time) {
1349 timestamp_mergers_[channel_index].set_pushed(false);
1350 return;
1351 }
1352
Austin Schuh05b70472020-01-01 17:11:17 -08001353 channel_heap_.push_back(std::make_pair(timestamp, channel_index));
1354
1355 // The default sort puts the newest message first. Use a custom comparator to
1356 // put the oldest message first.
1357 std::push_heap(channel_heap_.begin(), channel_heap_.end(),
1358 ChannelHeapCompare);
1359}
1360
Austin Schuh2f8fd752020-09-01 22:38:28 -07001361void ChannelMerger::VerifyHeaps() {
Austin Schuh661a8d82020-09-13 17:25:56 -07001362 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1363 channel_heap_;
1364 std::make_heap(channel_heap.begin(), channel_heap.end(), &ChannelHeapCompare);
Austin Schuh2f8fd752020-09-01 22:38:28 -07001365
Austin Schuh661a8d82020-09-13 17:25:56 -07001366 for (size_t i = 0; i < channel_heap_.size(); ++i) {
1367 CHECK(channel_heap_[i] == channel_heap[i]) << ": Heaps diverged...";
1368 CHECK_EQ(
1369 std::get<0>(channel_heap[i]),
1370 timestamp_mergers_[std::get<1>(channel_heap[i])].channel_merger_time());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001371 }
1372}
1373
Austin Schuh6f3babe2020-01-26 20:34:50 -08001374std::tuple<TimestampMerger::DeliveryTimestamp, int,
1375 FlatbufferVector<MessageHeader>>
1376ChannelMerger::PopOldest() {
Austin Schuh8bd96322020-02-13 21:18:22 -08001377 CHECK_GT(channel_heap_.size(), 0u);
Austin Schuh05b70472020-01-01 17:11:17 -08001378 std::pair<monotonic_clock::time_point, int> oldest_channel_data =
1379 channel_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001380 int channel_index = oldest_channel_data.second;
Austin Schuh05b70472020-01-01 17:11:17 -08001381 std::pop_heap(channel_heap_.begin(), channel_heap_.end(),
1382 &ChannelHeapCompare);
1383 channel_heap_.pop_back();
Austin Schuh8bd96322020-02-13 21:18:22 -08001384
Austin Schuh6f3babe2020-01-26 20:34:50 -08001385 timestamp_mergers_[channel_index].set_pushed(false);
Austin Schuh05b70472020-01-01 17:11:17 -08001386
Austin Schuh6f3babe2020-01-26 20:34:50 -08001387 TimestampMerger *merger = &timestamp_mergers_[channel_index];
Austin Schuh05b70472020-01-01 17:11:17 -08001388
Austin Schuhcde938c2020-02-02 17:30:07 -08001389 // Merger handles any queueing needed from here.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001390 std::tuple<TimestampMerger::DeliveryTimestamp,
1391 FlatbufferVector<MessageHeader>>
1392 message = merger->PopOldest();
Brian Silverman8a32ce62020-08-12 12:02:38 -07001393 DCHECK_EQ(std::get<0>(message).monotonic_event_time,
1394 oldest_channel_data.first)
1395 << ": channel_heap_ was corrupted for " << channel_index << ": "
1396 << DebugString();
Austin Schuh05b70472020-01-01 17:11:17 -08001397
Austin Schuh2f8fd752020-09-01 22:38:28 -07001398 CHECK_GE(std::get<0>(message).monotonic_event_time, last_popped_time_)
1399 << ": " << MaybeNodeName(log_file_header()->node())
1400 << "Messages came off the queue out of order. " << DebugString();
1401 last_popped_time_ = std::get<0>(message).monotonic_event_time;
1402
1403 VLOG(1) << "Popped " << last_popped_time_ << " "
1404 << configuration::StrippedChannelToString(
1405 configuration()->channels()->Get(channel_index))
1406 << " (" << channel_index << ")";
1407
Austin Schuh6f3babe2020-01-26 20:34:50 -08001408 return std::make_tuple(std::get<0>(message), channel_index,
1409 std::move(std::get<1>(message)));
1410}
1411
Austin Schuhcde938c2020-02-02 17:30:07 -08001412std::string SplitMessageReader::MessageHeaderQueue::DebugString() const {
1413 std::stringstream ss;
1414 for (size_t i = 0; i < data_.size(); ++i) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001415 if (i < 5 || i + 5 > data_.size()) {
1416 if (timestamps) {
1417 ss << " msg: ";
1418 } else {
1419 ss << " timestamp: ";
1420 }
1421 ss << monotonic_clock::time_point(
1422 chrono::nanoseconds(data_[i].message().monotonic_sent_time()))
Austin Schuhcde938c2020-02-02 17:30:07 -08001423 << " ("
Austin Schuh2f8fd752020-09-01 22:38:28 -07001424 << realtime_clock::time_point(
1425 chrono::nanoseconds(data_[i].message().realtime_sent_time()))
1426 << ") " << data_[i].message().queue_index();
1427 if (timestamps) {
1428 ss << " <- remote "
1429 << monotonic_clock::time_point(chrono::nanoseconds(
1430 data_[i].message().monotonic_remote_time()))
1431 << " ("
1432 << realtime_clock::time_point(chrono::nanoseconds(
1433 data_[i].message().realtime_remote_time()))
1434 << ")";
1435 }
1436 ss << "\n";
1437 } else if (i == 5) {
1438 ss << " ...\n";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001439 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001440 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001441
Austin Schuhcde938c2020-02-02 17:30:07 -08001442 return ss.str();
1443}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001444
Austin Schuhcde938c2020-02-02 17:30:07 -08001445std::string SplitMessageReader::DebugString(int channel) const {
1446 std::stringstream ss;
1447 ss << "[\n";
1448 ss << channels_[channel].data.DebugString();
1449 ss << " ]";
1450 return ss.str();
1451}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001452
Austin Schuhcde938c2020-02-02 17:30:07 -08001453std::string SplitMessageReader::DebugString(int channel, int node_index) const {
1454 std::stringstream ss;
1455 ss << "[\n";
1456 ss << channels_[channel].timestamps[node_index].DebugString();
1457 ss << " ]";
1458 return ss.str();
1459}
1460
1461std::string TimestampMerger::DebugString() const {
1462 std::stringstream ss;
1463
1464 if (timestamp_heap_.size() > 0) {
1465 ss << " timestamp_heap {\n";
1466 std::vector<
1467 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1468 timestamp_heap = timestamp_heap_;
1469 while (timestamp_heap.size() > 0u) {
1470 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1471 oldest_timestamp_reader = timestamp_heap.front();
1472
1473 ss << " " << std::get<2>(oldest_timestamp_reader) << " "
1474 << std::get<0>(oldest_timestamp_reader) << " queue_index ("
1475 << std::get<1>(oldest_timestamp_reader) << ") ttq "
1476 << std::get<2>(oldest_timestamp_reader)->time_to_queue() << " "
1477 << std::get<2>(oldest_timestamp_reader)->filename() << " -> "
1478 << std::get<2>(oldest_timestamp_reader)
1479 ->DebugString(channel_index_, node_index_)
1480 << "\n";
1481
1482 std::pop_heap(timestamp_heap.begin(), timestamp_heap.end(),
1483 &SplitMessageReaderHeapCompare);
1484 timestamp_heap.pop_back();
1485 }
1486 ss << " }\n";
1487 }
1488
1489 ss << " message_heap {\n";
1490 {
1491 std::vector<
1492 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1493 message_heap = message_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001494 while (!message_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001495 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1496 oldest_message_reader = message_heap.front();
1497
1498 ss << " " << std::get<2>(oldest_message_reader) << " "
1499 << std::get<0>(oldest_message_reader) << " queue_index ("
1500 << std::get<1>(oldest_message_reader) << ") ttq "
1501 << std::get<2>(oldest_message_reader)->time_to_queue() << " "
1502 << std::get<2>(oldest_message_reader)->filename() << " -> "
1503 << std::get<2>(oldest_message_reader)->DebugString(channel_index_)
1504 << "\n";
1505
1506 std::pop_heap(message_heap.begin(), message_heap.end(),
1507 &SplitMessageReaderHeapCompare);
1508 message_heap.pop_back();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001509 }
Austin Schuh05b70472020-01-01 17:11:17 -08001510 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001511 ss << " }";
1512
1513 return ss.str();
1514}
1515
1516std::string ChannelMerger::DebugString() const {
1517 std::stringstream ss;
1518 ss << "start_time " << realtime_start_time() << " " << monotonic_start_time()
1519 << "\n";
1520 ss << "channel_heap {\n";
1521 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1522 channel_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001523 while (!channel_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001524 std::tuple<monotonic_clock::time_point, int> channel = channel_heap.front();
1525 ss << " " << std::get<0>(channel) << " (" << std::get<1>(channel) << ") "
1526 << configuration::CleanedChannelToString(
1527 configuration()->channels()->Get(std::get<1>(channel)))
1528 << "\n";
1529
1530 ss << timestamp_mergers_[std::get<1>(channel)].DebugString() << "\n";
1531
1532 std::pop_heap(channel_heap.begin(), channel_heap.end(),
1533 &ChannelHeapCompare);
1534 channel_heap.pop_back();
1535 }
1536 ss << "}";
1537
1538 return ss.str();
Austin Schuh05b70472020-01-01 17:11:17 -08001539}
1540
Austin Schuhee711052020-08-24 16:06:09 -07001541std::string MaybeNodeName(const Node *node) {
1542 if (node != nullptr) {
1543 return node->name()->str() + " ";
1544 }
1545 return "";
1546}
1547
Brian Silvermanf51499a2020-09-21 12:49:08 -07001548} // namespace aos::logger