blob: 7ddc658c4198e487bab0145345e9488f4e4aa84e [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#include "aos/events/logging/logfile_utils.h"
2
3#include <fcntl.h>
Austin Schuha36c8902019-12-30 18:07:15 -08004#include <sys/stat.h>
5#include <sys/types.h>
6#include <sys/uio.h>
7
Brian Silvermanf51499a2020-09-21 12:49:08 -07008#include <algorithm>
9#include <climits>
Austin Schuha36c8902019-12-30 18:07:15 -080010
Austin Schuhe4fca832020-03-07 16:58:53 -080011#include "absl/strings/escaping.h"
Austin Schuh05b70472020-01-01 17:11:17 -080012#include "aos/configuration.h"
Austin Schuhfa895892020-01-07 20:07:41 -080013#include "aos/flatbuffer_merge.h"
Austin Schuh6f3babe2020-01-26 20:34:50 -080014#include "aos/util/file.h"
Austin Schuha36c8902019-12-30 18:07:15 -080015#include "flatbuffers/flatbuffers.h"
Austin Schuh05b70472020-01-01 17:11:17 -080016#include "gflags/gflags.h"
17#include "glog/logging.h"
Austin Schuha36c8902019-12-30 18:07:15 -080018
Brian Silvermanf59fe3f2020-09-22 21:04:09 -070019#if defined(__x86_64__)
20#define ENABLE_LZMA 1
21#elif defined(__aarch64__)
22#define ENABLE_LZMA 1
23#else
24#define ENABLE_LZMA 0
25#endif
26
27#if ENABLE_LZMA
28#include "aos/events/logging/lzma_encoder.h"
29#endif
30
Austin Schuh7fbf5a72020-09-21 16:28:13 -070031DEFINE_int32(flush_size, 128000,
Austin Schuha36c8902019-12-30 18:07:15 -080032 "Number of outstanding bytes to allow before flushing to disk.");
33
Brian Silvermanf51499a2020-09-21 12:49:08 -070034namespace aos::logger {
Austin Schuha36c8902019-12-30 18:07:15 -080035
Austin Schuh05b70472020-01-01 17:11:17 -080036namespace chrono = std::chrono;
37
Brian Silvermanf51499a2020-09-21 12:49:08 -070038DetachedBufferWriter::DetachedBufferWriter(
39 std::string_view filename, std::unique_ptr<DetachedBufferEncoder> encoder)
40 : filename_(filename), encoder_(std::move(encoder)) {
Austin Schuh6f3babe2020-01-26 20:34:50 -080041 util::MkdirP(filename, 0777);
42 fd_ = open(std::string(filename).c_str(),
43 O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
44 VLOG(1) << "Opened " << filename << " for writing";
45 PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
Austin Schuha36c8902019-12-30 18:07:15 -080046}
47
48DetachedBufferWriter::~DetachedBufferWriter() {
Brian Silvermanf51499a2020-09-21 12:49:08 -070049 encoder_->Finish();
50 while (encoder_->queue_size() > 0) {
51 Flush();
52 }
Austin Schuha36c8902019-12-30 18:07:15 -080053 PLOG_IF(ERROR, close(fd_) == -1) << " Failed to close logfile";
Austin Schuh2f8fd752020-09-01 22:38:28 -070054 VLOG(1) << "Closed " << filename_;
55}
56
Brian Silvermand90905f2020-09-23 14:42:56 -070057DetachedBufferWriter::DetachedBufferWriter(DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070058 *this = std::move(other);
59}
60
Brian Silverman87ac0402020-09-17 14:47:01 -070061// When other is destroyed "soon" (which it should be because we're getting an
62// rvalue reference to it), it will flush etc all the data we have queued up
63// (because that data will then be its data).
Austin Schuh2f8fd752020-09-01 22:38:28 -070064DetachedBufferWriter &DetachedBufferWriter::operator=(
65 DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070066 std::swap(filename_, other.filename_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070067 std::swap(encoder_, other.encoder_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070068 std::swap(fd_, other.fd_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070069 std::swap(iovec_, other.iovec_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070070 std::swap(max_write_time_, other.max_write_time_);
71 std::swap(max_write_time_bytes_, other.max_write_time_bytes_);
72 std::swap(max_write_time_messages_, other.max_write_time_messages_);
73 std::swap(total_write_time_, other.total_write_time_);
74 std::swap(total_write_count_, other.total_write_count_);
75 std::swap(total_write_messages_, other.total_write_messages_);
76 std::swap(total_write_bytes_, other.total_write_bytes_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070077 return *this;
Austin Schuha36c8902019-12-30 18:07:15 -080078}
79
Brian Silvermanf51499a2020-09-21 12:49:08 -070080void DetachedBufferWriter::QueueSpan(absl::Span<const uint8_t> span) {
81 if (encoder_->may_bypass() && span.size() > 4096u) {
82 // Over this threshold, we'll assume it's cheaper to add an extra
83 // syscall to write the data immediately instead of copying it to
84 // enqueue.
Austin Schuha36c8902019-12-30 18:07:15 -080085
Brian Silvermanf51499a2020-09-21 12:49:08 -070086 // First, flush everything.
87 while (encoder_->queue_size() > 0u) {
88 Flush();
89 }
Austin Schuhde031b72020-01-10 19:34:41 -080090
Brian Silvermanf51499a2020-09-21 12:49:08 -070091 // Then, write it directly.
92 const auto start = aos::monotonic_clock::now();
93 const ssize_t written = write(fd_, span.data(), span.size());
94 const auto end = aos::monotonic_clock::now();
95 PCHECK(written >= 0) << ": write failed";
96 CHECK_EQ(written, static_cast<ssize_t>(span.size()))
97 << ": Wrote " << written << " expected " << span.size();
98 UpdateStatsForWrite(end - start, written, 1);
99 } else {
100 encoder_->Encode(CopySpanAsDetachedBuffer(span));
Austin Schuha36c8902019-12-30 18:07:15 -0800101 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700102
103 FlushAtThreshold();
Austin Schuha36c8902019-12-30 18:07:15 -0800104}
105
106void DetachedBufferWriter::Flush() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700107 const auto queue = encoder_->queue();
108 if (queue.empty()) {
Austin Schuha36c8902019-12-30 18:07:15 -0800109 return;
110 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700111
Austin Schuha36c8902019-12-30 18:07:15 -0800112 iovec_.clear();
Brian Silvermanf51499a2020-09-21 12:49:08 -0700113 const size_t iovec_size = std::min<size_t>(queue.size(), IOV_MAX);
114 iovec_.resize(iovec_size);
Austin Schuha36c8902019-12-30 18:07:15 -0800115 size_t counted_size = 0;
Brian Silvermanf51499a2020-09-21 12:49:08 -0700116 for (size_t i = 0; i < iovec_size; ++i) {
117 iovec_[i].iov_base = const_cast<uint8_t *>(queue[i].data());
118 iovec_[i].iov_len = queue[i].size();
119 counted_size += iovec_[i].iov_len;
Austin Schuha36c8902019-12-30 18:07:15 -0800120 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700121
122 const auto start = aos::monotonic_clock::now();
Austin Schuha36c8902019-12-30 18:07:15 -0800123 const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700124 const auto end = aos::monotonic_clock::now();
125 PCHECK(written >= 0) << ": writev failed";
Austin Schuha36c8902019-12-30 18:07:15 -0800126 // TODO(austin): Handle partial writes in some way other than crashing...
Brian Silvermanf51499a2020-09-21 12:49:08 -0700127 CHECK_EQ(written, static_cast<ssize_t>(counted_size))
128 << ": Wrote " << written << " expected " << counted_size;
129
130 encoder_->Clear(iovec_size);
131
132 UpdateStatsForWrite(end - start, written, iovec_size);
133}
134
135void DetachedBufferWriter::UpdateStatsForWrite(
136 aos::monotonic_clock::duration duration, ssize_t written, int iovec_size) {
137 if (duration > max_write_time_) {
138 max_write_time_ = duration;
139 max_write_time_bytes_ = written;
140 max_write_time_messages_ = iovec_size;
141 }
142 total_write_time_ += duration;
143 ++total_write_count_;
144 total_write_messages_ += iovec_size;
145 total_write_bytes_ += written;
146}
147
148void DetachedBufferWriter::FlushAtThreshold() {
149 // Flush if we are at the max number of iovs per writev, because there's no
150 // point queueing up any more data in memory. Also flush once we have enough
151 // data queued up.
152 while (encoder_->queued_bytes() > static_cast<size_t>(FLAGS_flush_size) ||
153 encoder_->queue_size() >= IOV_MAX) {
154 Flush();
155 }
Austin Schuha36c8902019-12-30 18:07:15 -0800156}
157
158flatbuffers::Offset<MessageHeader> PackMessage(
159 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
160 int channel_index, LogType log_type) {
161 flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;
162
163 switch (log_type) {
164 case LogType::kLogMessage:
165 case LogType::kLogMessageAndDeliveryTime:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800166 case LogType::kLogRemoteMessage:
Brian Silvermaneaa41d62020-07-08 19:47:35 -0700167 data_offset = fbb->CreateVector(
168 static_cast<const uint8_t *>(context.data), context.size);
Austin Schuha36c8902019-12-30 18:07:15 -0800169 break;
170
171 case LogType::kLogDeliveryTimeOnly:
172 break;
173 }
174
175 MessageHeader::Builder message_header_builder(*fbb);
176 message_header_builder.add_channel_index(channel_index);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800177
178 switch (log_type) {
179 case LogType::kLogRemoteMessage:
180 message_header_builder.add_queue_index(context.remote_queue_index);
181 message_header_builder.add_monotonic_sent_time(
182 context.monotonic_remote_time.time_since_epoch().count());
183 message_header_builder.add_realtime_sent_time(
184 context.realtime_remote_time.time_since_epoch().count());
185 break;
186
187 case LogType::kLogMessage:
188 case LogType::kLogMessageAndDeliveryTime:
189 case LogType::kLogDeliveryTimeOnly:
190 message_header_builder.add_queue_index(context.queue_index);
191 message_header_builder.add_monotonic_sent_time(
192 context.monotonic_event_time.time_since_epoch().count());
193 message_header_builder.add_realtime_sent_time(
194 context.realtime_event_time.time_since_epoch().count());
195 break;
196 }
Austin Schuha36c8902019-12-30 18:07:15 -0800197
198 switch (log_type) {
199 case LogType::kLogMessage:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800200 case LogType::kLogRemoteMessage:
Austin Schuha36c8902019-12-30 18:07:15 -0800201 message_header_builder.add_data(data_offset);
202 break;
203
204 case LogType::kLogMessageAndDeliveryTime:
205 message_header_builder.add_data(data_offset);
206 [[fallthrough]];
207
208 case LogType::kLogDeliveryTimeOnly:
209 message_header_builder.add_monotonic_remote_time(
210 context.monotonic_remote_time.time_since_epoch().count());
211 message_header_builder.add_realtime_remote_time(
212 context.realtime_remote_time.time_since_epoch().count());
213 message_header_builder.add_remote_queue_index(context.remote_queue_index);
214 break;
215 }
216
217 return message_header_builder.Finish();
218}
219
Brian Silvermanf51499a2020-09-21 12:49:08 -0700220SpanReader::SpanReader(std::string_view filename) : filename_(filename) {
Brian Silvermanf59fe3f2020-09-22 21:04:09 -0700221 static const std::string_view kXz = ".xz";
222 if (filename.substr(filename.size() - kXz.size()) == kXz) {
223#if ENABLE_LZMA
224 decoder_ = std::make_unique<LzmaDecoder>(filename);
225#else
226 LOG(FATAL) << "Reading xz-compressed files not supported on this platform";
227#endif
228 } else {
229 decoder_ = std::make_unique<DummyDecoder>(filename);
230 }
Austin Schuh05b70472020-01-01 17:11:17 -0800231}
232
233absl::Span<const uint8_t> SpanReader::ReadMessage() {
234 // Make sure we have enough for the size.
235 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
236 if (!ReadBlock()) {
237 return absl::Span<const uint8_t>();
238 }
239 }
240
241 // Now make sure we have enough for the message.
242 const size_t data_size =
243 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
244 sizeof(flatbuffers::uoffset_t);
Austin Schuhe4fca832020-03-07 16:58:53 -0800245 if (data_size == sizeof(flatbuffers::uoffset_t)) {
246 LOG(ERROR) << "Size of data is zero. Log file end is corrupted, skipping.";
247 LOG(ERROR) << " Rest of log file is "
248 << absl::BytesToHexString(std::string_view(
249 reinterpret_cast<const char *>(data_.data() +
250 consumed_data_),
251 data_.size() - consumed_data_));
252 return absl::Span<const uint8_t>();
253 }
Austin Schuh05b70472020-01-01 17:11:17 -0800254 while (data_.size() < consumed_data_ + data_size) {
255 if (!ReadBlock()) {
256 return absl::Span<const uint8_t>();
257 }
258 }
259
260 // And return it, consuming the data.
261 const uint8_t *data_ptr = data_.data() + consumed_data_;
262
263 consumed_data_ += data_size;
264
265 return absl::Span<const uint8_t>(data_ptr, data_size);
266}
267
268bool SpanReader::MessageAvailable() {
269 // Are we big enough to read the size?
270 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
271 return false;
272 }
273
274 // Then, are we big enough to read the full message?
275 const size_t data_size =
276 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
277 sizeof(flatbuffers::uoffset_t);
278 if (data_.size() < consumed_data_ + data_size) {
279 return false;
280 }
281
282 return true;
283}
284
285bool SpanReader::ReadBlock() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700286 // This is the amount of data we grab at a time. Doing larger chunks minimizes
287 // syscalls and helps decompressors batch things more efficiently.
Austin Schuh05b70472020-01-01 17:11:17 -0800288 constexpr size_t kReadSize = 256 * 1024;
289
290 // Strip off any unused data at the front.
291 if (consumed_data_ != 0) {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700292 data_.erase_front(consumed_data_);
Austin Schuh05b70472020-01-01 17:11:17 -0800293 consumed_data_ = 0;
294 }
295
296 const size_t starting_size = data_.size();
297
298 // This should automatically grow the backing store. It won't shrink if we
299 // get a small chunk later. This reduces allocations when we want to append
300 // more data.
Brian Silvermanf51499a2020-09-21 12:49:08 -0700301 data_.resize(starting_size + kReadSize);
Austin Schuh05b70472020-01-01 17:11:17 -0800302
Brian Silvermanf51499a2020-09-21 12:49:08 -0700303 const size_t count =
304 decoder_->Read(data_.begin() + starting_size, data_.end());
305 data_.resize(starting_size + count);
Austin Schuh05b70472020-01-01 17:11:17 -0800306 if (count == 0) {
Austin Schuh05b70472020-01-01 17:11:17 -0800307 return false;
308 }
Austin Schuh05b70472020-01-01 17:11:17 -0800309
310 return true;
311}
312
Austin Schuh6f3babe2020-01-26 20:34:50 -0800313FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename) {
314 SpanReader span_reader(filename);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800315 absl::Span<const uint8_t> config_data = span_reader.ReadMessage();
316
317 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700318 CHECK(config_data != absl::Span<const uint8_t>())
319 << ": Failed to read header from: " << filename;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800320
Austin Schuh5212cad2020-09-09 23:12:09 -0700321 // And copy the config so we have it forever, removing the size prefix.
Brian Silverman354697a2020-09-22 21:06:32 -0700322 ResizeableBuffer data;
323 data.resize(config_data.size() - sizeof(flatbuffers::uoffset_t));
324 memcpy(data.data(), config_data.begin() + sizeof(flatbuffers::uoffset_t),
325 data.size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800326 return FlatbufferVector<LogFileHeader>(std::move(data));
327}
328
Austin Schuh5212cad2020-09-09 23:12:09 -0700329FlatbufferVector<MessageHeader> ReadNthMessage(std::string_view filename,
330 size_t n) {
331 SpanReader span_reader(filename);
332 absl::Span<const uint8_t> data_span = span_reader.ReadMessage();
333 for (size_t i = 0; i < n + 1; ++i) {
334 data_span = span_reader.ReadMessage();
335
336 // Make sure something was read.
337 CHECK(data_span != absl::Span<const uint8_t>())
338 << ": Failed to read data from: " << filename;
339 }
340
Brian Silverman354697a2020-09-22 21:06:32 -0700341 // And copy the config so we have it forever, removing the size prefix.
342 ResizeableBuffer data;
343 data.resize(data_span.size() - sizeof(flatbuffers::uoffset_t));
344 memcpy(data.data(), data_span.begin() + sizeof(flatbuffers::uoffset_t),
345 data.size());
Austin Schuh5212cad2020-09-09 23:12:09 -0700346 return FlatbufferVector<MessageHeader>(std::move(data));
347}
348
Austin Schuh05b70472020-01-01 17:11:17 -0800349MessageReader::MessageReader(std::string_view filename)
Austin Schuh97789fc2020-08-01 14:42:45 -0700350 : span_reader_(filename),
351 raw_log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuh05b70472020-01-01 17:11:17 -0800352 // Make sure we have enough to read the size.
Austin Schuh97789fc2020-08-01 14:42:45 -0700353 absl::Span<const uint8_t> header_data = span_reader_.ReadMessage();
Austin Schuh05b70472020-01-01 17:11:17 -0800354
355 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700356 CHECK(header_data != absl::Span<const uint8_t>())
357 << ": Failed to read header from: " << filename;
Austin Schuh05b70472020-01-01 17:11:17 -0800358
Austin Schuh97789fc2020-08-01 14:42:45 -0700359 // And copy the header data so we have it forever.
Brian Silverman354697a2020-09-22 21:06:32 -0700360 ResizeableBuffer header_data_copy;
361 header_data_copy.resize(header_data.size() - sizeof(flatbuffers::uoffset_t));
362 memcpy(header_data_copy.data(),
363 header_data.begin() + sizeof(flatbuffers::uoffset_t),
364 header_data_copy.size());
Austin Schuh97789fc2020-08-01 14:42:45 -0700365 raw_log_file_header_ =
366 FlatbufferVector<LogFileHeader>(std::move(header_data_copy));
Austin Schuh05b70472020-01-01 17:11:17 -0800367
Austin Schuhcde938c2020-02-02 17:30:07 -0800368 max_out_of_order_duration_ =
Austin Schuh2f8fd752020-09-01 22:38:28 -0700369 chrono::nanoseconds(log_file_header()->max_out_of_order_duration());
Austin Schuhcde938c2020-02-02 17:30:07 -0800370
371 VLOG(1) << "Opened " << filename << " as node "
372 << FlatbufferToJson(log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800373}
374
375std::optional<FlatbufferVector<MessageHeader>> MessageReader::ReadMessage() {
376 absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();
377 if (msg_data == absl::Span<const uint8_t>()) {
378 return std::nullopt;
379 }
380
Brian Silverman354697a2020-09-22 21:06:32 -0700381 ResizeableBuffer result_buffer;
382 result_buffer.resize(msg_data.size() - sizeof(flatbuffers::uoffset_t));
383 memcpy(result_buffer.data(),
384 msg_data.begin() + sizeof(flatbuffers::uoffset_t),
385 result_buffer.size());
386 FlatbufferVector<MessageHeader> result(std::move(result_buffer));
Austin Schuh05b70472020-01-01 17:11:17 -0800387
388 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
389 chrono::nanoseconds(result.message().monotonic_sent_time()));
390
391 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
Austin Schuh8bd96322020-02-13 21:18:22 -0800392 VLOG(2) << "Read from " << filename() << " data " << FlatbufferToJson(result);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800393 return std::move(result);
Austin Schuh05b70472020-01-01 17:11:17 -0800394}
395
Austin Schuh6f3babe2020-01-26 20:34:50 -0800396SplitMessageReader::SplitMessageReader(
Austin Schuhfa895892020-01-07 20:07:41 -0800397 const std::vector<std::string> &filenames)
398 : filenames_(filenames),
Austin Schuh97789fc2020-08-01 14:42:45 -0700399 log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuhfa895892020-01-07 20:07:41 -0800400 CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";
401
Austin Schuh6f3babe2020-01-26 20:34:50 -0800402 // Grab any log file header. They should all match (and we will check as we
403 // open more of them).
Austin Schuh97789fc2020-08-01 14:42:45 -0700404 log_file_header_ = message_reader_->raw_log_file_header();
Austin Schuhfa895892020-01-07 20:07:41 -0800405
Austin Schuh2f8fd752020-09-01 22:38:28 -0700406 for (size_t i = 1; i < filenames_.size(); ++i) {
407 MessageReader message_reader(filenames_[i]);
408
409 const monotonic_clock::time_point new_monotonic_start_time(
410 chrono::nanoseconds(
411 message_reader.log_file_header()->monotonic_start_time()));
412 const realtime_clock::time_point new_realtime_start_time(
413 chrono::nanoseconds(
414 message_reader.log_file_header()->realtime_start_time()));
415
416 // There are 2 types of part files. Part files from before time estimation
417 // has started, and part files after. We don't declare a log file "started"
418 // until time estimation is up. And once a log file starts, it should never
419 // stop again, and should remain constant.
420 // To compare both types of headers, we mutate our saved copy of the header
421 // to match the next chunk by updating time if we detect a stopped ->
422 // started transition.
423 if (monotonic_start_time() == monotonic_clock::min_time) {
424 CHECK_EQ(realtime_start_time(), realtime_clock::min_time);
425 // We should only be missing the monotonic start time when logging data
Brian Silverman87ac0402020-09-17 14:47:01 -0700426 // for remote nodes. We don't have a good way to determine the remote
Austin Schuh2f8fd752020-09-01 22:38:28 -0700427 // realtime offset, so it shouldn't be filled out.
428 // TODO(austin): If we have a good way, feel free to fill it out. It
429 // probably won't be better than we could do in post though with the same
430 // data.
431 CHECK(!log_file_header_.mutable_message()->has_realtime_start_time());
432 if (new_monotonic_start_time != monotonic_clock::min_time) {
433 // If we finally found our start time, update the header. Do this once
434 // because it should never change again.
435 log_file_header_.mutable_message()->mutate_monotonic_start_time(
436 new_monotonic_start_time.time_since_epoch().count());
437 log_file_header_.mutable_message()->mutate_realtime_start_time(
438 new_realtime_start_time.time_since_epoch().count());
439 }
440 }
441
Austin Schuh64fab802020-09-09 22:47:47 -0700442 // We don't have a good way to set the realtime start time on remote nodes.
443 // Confirm it remains consistent.
444 CHECK_EQ(log_file_header_.mutable_message()->has_realtime_start_time(),
445 message_reader.log_file_header()->has_realtime_start_time());
446
447 // Parts index will *not* match unless we set them to match. We only want
448 // to accept the start time and parts mismatching, so set them.
449 log_file_header_.mutable_message()->mutate_parts_index(
450 message_reader.log_file_header()->parts_index());
451
Austin Schuh2f8fd752020-09-01 22:38:28 -0700452 // Now compare that the headers match.
Austin Schuh64fab802020-09-09 22:47:47 -0700453 if (!CompareFlatBuffer(message_reader.raw_log_file_header(),
454 log_file_header_)) {
455 if (message_reader.log_file_header()->has_logger_uuid() &&
456 log_file_header_.message().has_logger_uuid() &&
457 message_reader.log_file_header()->logger_uuid()->string_view() !=
458 log_file_header_.message().logger_uuid()->string_view()) {
459 LOG(FATAL) << "Logger UUIDs don't match between log file chunks "
460 << filenames_[0] << " and " << filenames_[i]
461 << ", this is not supported.";
462 }
463 if (message_reader.log_file_header()->has_parts_uuid() &&
464 log_file_header_.message().has_parts_uuid() &&
465 message_reader.log_file_header()->parts_uuid()->string_view() !=
466 log_file_header_.message().parts_uuid()->string_view()) {
467 LOG(FATAL) << "Parts UUIDs don't match between log file chunks "
468 << filenames_[0] << " and " << filenames_[i]
469 << ", this is not supported.";
470 }
471
472 LOG(FATAL) << "Header is different between log file chunks "
473 << filenames_[0] << " and " << filenames_[i]
474 << ", this is not supported.";
475 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700476 }
Austin Schuh64fab802020-09-09 22:47:47 -0700477 // Put the parts index back to the first log file chunk.
478 log_file_header_.mutable_message()->mutate_parts_index(
479 message_reader_->log_file_header()->parts_index());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700480
Austin Schuh6f3babe2020-01-26 20:34:50 -0800481 // Setup per channel state.
Austin Schuh05b70472020-01-01 17:11:17 -0800482 channels_.resize(configuration()->channels()->size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800483 for (ChannelData &channel_data : channels_) {
484 channel_data.data.split_reader = this;
485 // Build up the timestamp list.
486 if (configuration::MultiNode(configuration())) {
487 channel_data.timestamps.resize(configuration()->nodes()->size());
488 for (MessageHeaderQueue &queue : channel_data.timestamps) {
489 queue.timestamps = true;
490 queue.split_reader = this;
491 }
492 }
493 }
Austin Schuh05b70472020-01-01 17:11:17 -0800494
Austin Schuh6f3babe2020-01-26 20:34:50 -0800495 // Build up channels_to_write_ as an optimization to make it fast to figure
496 // out which datastructure to place any new data from a channel on.
497 for (const Channel *channel : *configuration()->channels()) {
498 // This is the main case. We will only see data on this node.
499 if (configuration::ChannelIsSendableOnNode(channel, node())) {
500 channels_to_write_.emplace_back(
501 &channels_[channels_to_write_.size()].data);
502 } else
503 // If we can't send, but can receive, we should be able to see
504 // timestamps here.
505 if (configuration::ChannelIsReadableOnNode(channel, node())) {
506 channels_to_write_.emplace_back(
507 &(channels_[channels_to_write_.size()]
508 .timestamps[configuration::GetNodeIndex(configuration(),
509 node())]));
510 } else {
511 channels_to_write_.emplace_back(nullptr);
512 }
513 }
Austin Schuh05b70472020-01-01 17:11:17 -0800514}
515
Austin Schuh6f3babe2020-01-26 20:34:50 -0800516bool SplitMessageReader::NextLogFile() {
Austin Schuhfa895892020-01-07 20:07:41 -0800517 if (next_filename_index_ == filenames_.size()) {
518 return false;
519 }
520 message_reader_ =
521 std::make_unique<MessageReader>(filenames_[next_filename_index_]);
522
523 // We can't support the config diverging between two log file headers. See if
524 // they are the same.
525 if (next_filename_index_ != 0) {
Austin Schuh64fab802020-09-09 22:47:47 -0700526 // In order for the headers to identically compare, they need to have the
527 // same parts_index. Rewrite the saved header with the new parts_index,
528 // compare, and then restore.
529 const int32_t original_parts_index =
530 log_file_header_.message().parts_index();
531 log_file_header_.mutable_message()->mutate_parts_index(
532 message_reader_->log_file_header()->parts_index());
533
Austin Schuh97789fc2020-08-01 14:42:45 -0700534 CHECK(CompareFlatBuffer(message_reader_->raw_log_file_header(),
535 log_file_header_))
Austin Schuhfa895892020-01-07 20:07:41 -0800536 << ": Header is different between log file chunks "
537 << filenames_[next_filename_index_] << " and "
538 << filenames_[next_filename_index_ - 1] << ", this is not supported.";
Austin Schuh64fab802020-09-09 22:47:47 -0700539
540 log_file_header_.mutable_message()->mutate_parts_index(
541 original_parts_index);
Austin Schuhfa895892020-01-07 20:07:41 -0800542 }
543
544 ++next_filename_index_;
545 return true;
546}
547
Austin Schuh6f3babe2020-01-26 20:34:50 -0800548bool SplitMessageReader::QueueMessages(
Austin Schuhcde938c2020-02-02 17:30:07 -0800549 monotonic_clock::time_point last_dequeued_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800550 // TODO(austin): Once we are happy that everything works, read a 256kb chunk
551 // to reduce the need to re-heap down below.
Austin Schuhcde938c2020-02-02 17:30:07 -0800552
553 // Special case no more data. Otherwise we blow up on the CHECK statement
554 // confirming that we have enough data queued.
555 if (at_end_) {
556 return false;
557 }
558
559 // If this isn't the first time around, confirm that we had enough data queued
560 // to follow the contract.
561 if (time_to_queue_ != monotonic_clock::min_time) {
562 CHECK_LE(last_dequeued_time,
563 newest_timestamp() - max_out_of_order_duration())
564 << " node " << FlatbufferToJson(node()) << " on " << this;
565
566 // Bail if there is enough data already queued.
567 if (last_dequeued_time < time_to_queue_) {
Austin Schuhee711052020-08-24 16:06:09 -0700568 VLOG(1) << MaybeNodeName(target_node_) << "All up to date on " << this
569 << ", dequeued " << last_dequeued_time << " queue time "
570 << time_to_queue_;
Austin Schuhcde938c2020-02-02 17:30:07 -0800571 return true;
572 }
573 } else {
574 // Startup takes a special dance. We want to queue up until the start time,
575 // but we then want to find the next message to read. The conservative
576 // answer is to immediately trigger a second requeue to get things moving.
577 time_to_queue_ = monotonic_start_time();
578 QueueMessages(time_to_queue_);
579 }
580
581 // If we are asked to queue, queue for at least max_out_of_order_duration past
582 // the last known time in the log file (ie the newest timestep read). As long
583 // as we requeue exactly when time_to_queue_ is dequeued and go no further, we
584 // are safe. And since we pop in order, that works.
585 //
586 // Special case the start of the log file. There should be at most 1 message
587 // from each channel at the start of the log file. So always force the start
588 // of the log file to just be read.
589 time_to_queue_ = std::max(time_to_queue_, newest_timestamp());
Austin Schuhee711052020-08-24 16:06:09 -0700590 VLOG(1) << MaybeNodeName(target_node_) << "Queueing, going until "
591 << time_to_queue_ << " " << filename();
Austin Schuhcde938c2020-02-02 17:30:07 -0800592
593 bool was_emplaced = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800594 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800595 // Stop if we have enough.
Brian Silverman98360e22020-04-28 16:51:20 -0700596 if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&
Austin Schuhcde938c2020-02-02 17:30:07 -0800597 was_emplaced) {
Austin Schuhee711052020-08-24 16:06:09 -0700598 VLOG(1) << MaybeNodeName(target_node_) << "Done queueing on " << this
599 << ", queued to " << newest_timestamp() << " with requeue time "
600 << time_to_queue_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800601 return true;
602 }
Austin Schuh05b70472020-01-01 17:11:17 -0800603
Austin Schuh6f3babe2020-01-26 20:34:50 -0800604 if (std::optional<FlatbufferVector<MessageHeader>> msg =
605 message_reader_->ReadMessage()) {
606 const MessageHeader &header = msg.value().message();
607
Austin Schuhcde938c2020-02-02 17:30:07 -0800608 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
609 chrono::nanoseconds(header.monotonic_sent_time()));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800610
Austin Schuh0b5fd032020-03-28 17:36:49 -0700611 if (VLOG_IS_ON(2)) {
Brian Silvermand90905f2020-09-23 14:42:56 -0700612 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
613 << filename() << " ttq: " << time_to_queue_ << " now "
Austin Schuhee711052020-08-24 16:06:09 -0700614 << newest_timestamp() << " start time "
615 << monotonic_start_time() << " " << FlatbufferToJson(&header);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700616 } else if (VLOG_IS_ON(1)) {
617 FlatbufferVector<MessageHeader> copy = msg.value();
618 copy.mutable_message()->clear_data();
Austin Schuhee711052020-08-24 16:06:09 -0700619 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
620 << filename() << " ttq: " << time_to_queue_ << " now "
621 << newest_timestamp() << " start time "
622 << monotonic_start_time() << " " << FlatbufferToJson(copy);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700623 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800624
625 const int channel_index = header.channel_index();
626 was_emplaced = channels_to_write_[channel_index]->emplace_back(
627 std::move(msg.value()));
628 if (was_emplaced) {
629 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
630 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800631 } else {
632 if (!NextLogFile()) {
Austin Schuhee711052020-08-24 16:06:09 -0700633 VLOG(1) << MaybeNodeName(target_node_) << "No more files, last was "
634 << filenames_.back();
Austin Schuhcde938c2020-02-02 17:30:07 -0800635 at_end_ = true;
Austin Schuh8bd96322020-02-13 21:18:22 -0800636 for (MessageHeaderQueue *queue : channels_to_write_) {
637 if (queue == nullptr || queue->timestamp_merger == nullptr) {
638 continue;
639 }
640 queue->timestamp_merger->NoticeAtEnd();
641 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800642 return false;
643 }
644 }
Austin Schuh05b70472020-01-01 17:11:17 -0800645 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800646}
647
648void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,
649 int channel_index,
650 const Node *target_node) {
651 const Node *reinterpreted_target_node =
652 configuration::GetNodeOrDie(configuration(), target_node);
Austin Schuhee711052020-08-24 16:06:09 -0700653 target_node_ = reinterpreted_target_node;
654
Austin Schuh6f3babe2020-01-26 20:34:50 -0800655 const Channel *const channel =
656 configuration()->channels()->Get(channel_index);
657
Austin Schuhcde938c2020-02-02 17:30:07 -0800658 VLOG(1) << " Configuring merger " << this << " for channel " << channel_index
659 << " "
660 << configuration::CleanedChannelToString(
661 configuration()->channels()->Get(channel_index));
662
Austin Schuh6f3babe2020-01-26 20:34:50 -0800663 MessageHeaderQueue *message_header_queue = nullptr;
664
665 // Figure out if this log file is from our point of view, or the other node's
666 // point of view.
667 if (node() == reinterpreted_target_node) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800668 VLOG(1) << " Replaying as logged node " << filename();
669
670 if (configuration::ChannelIsSendableOnNode(channel, node())) {
671 VLOG(1) << " Data on node";
672 message_header_queue = &(channels_[channel_index].data);
673 } else if (configuration::ChannelIsReadableOnNode(channel, node())) {
674 VLOG(1) << " Timestamps on node";
675 message_header_queue =
676 &(channels_[channel_index].timestamps[configuration::GetNodeIndex(
677 configuration(), node())]);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800678 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800679 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800680 }
681 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800682 VLOG(1) << " Replaying as other node " << filename();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800683 // We are replaying from another node's point of view. The only interesting
Austin Schuhcde938c2020-02-02 17:30:07 -0800684 // data is data that is sent from our node and received on theirs.
685 if (configuration::ChannelIsReadableOnNode(channel,
686 reinterpreted_target_node) &&
687 configuration::ChannelIsSendableOnNode(channel, node())) {
688 VLOG(1) << " Readable on target node";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800689 // Data from another node.
690 message_header_queue = &(channels_[channel_index].data);
691 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800692 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800693 // This is either not sendable on the other node, or is a timestamp and
694 // therefore not interesting.
695 }
696 }
697
698 // If we found one, write it down. This will be nullptr when there is nothing
699 // relevant on this channel on this node for the target node. In that case,
700 // we want to drop the message instead of queueing it.
701 if (message_header_queue != nullptr) {
702 message_header_queue->timestamp_merger = timestamp_merger;
703 }
704}
705
706std::tuple<monotonic_clock::time_point, uint32_t,
707 FlatbufferVector<MessageHeader>>
708SplitMessageReader::PopOldest(int channel_index) {
709 CHECK_GT(channels_[channel_index].data.size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800710 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
711 timestamp = channels_[channel_index].data.front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800712 FlatbufferVector<MessageHeader> front =
713 std::move(channels_[channel_index].data.front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700714 channels_[channel_index].data.PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800715
Austin Schuh2f8fd752020-09-01 22:38:28 -0700716 VLOG(1) << MaybeNodeName(target_node_) << "Popped Data " << this << " "
717 << std::get<0>(timestamp) << " for "
718 << configuration::StrippedChannelToString(
719 configuration()->channels()->Get(channel_index))
720 << " (" << channel_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800721
722 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800723
724 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
725 std::move(front));
726}
727
728std::tuple<monotonic_clock::time_point, uint32_t,
729 FlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -0700730SplitMessageReader::PopOldestTimestamp(int channel, int node_index) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800731 CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800732 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
733 timestamp = channels_[channel].timestamps[node_index].front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800734 FlatbufferVector<MessageHeader> front =
735 std::move(channels_[channel].timestamps[node_index].front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700736 channels_[channel].timestamps[node_index].PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800737
Austin Schuh2f8fd752020-09-01 22:38:28 -0700738 VLOG(1) << MaybeNodeName(target_node_) << "Popped timestamp " << this << " "
Austin Schuhee711052020-08-24 16:06:09 -0700739 << std::get<0>(timestamp) << " for "
740 << configuration::StrippedChannelToString(
741 configuration()->channels()->Get(channel))
Austin Schuh2f8fd752020-09-01 22:38:28 -0700742 << " on "
743 << configuration()->nodes()->Get(node_index)->name()->string_view()
744 << " (" << node_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800745
746 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800747
748 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
749 std::move(front));
750}
751
Austin Schuhcde938c2020-02-02 17:30:07 -0800752bool SplitMessageReader::MessageHeaderQueue::emplace_back(
Austin Schuh6f3babe2020-01-26 20:34:50 -0800753 FlatbufferVector<MessageHeader> &&msg) {
754 CHECK(split_reader != nullptr);
755
756 // If there is no timestamp merger for this queue, nobody is listening. Drop
757 // the message. This happens when a log file from another node is replayed,
758 // and the timestamp mergers down stream just don't care.
759 if (timestamp_merger == nullptr) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800760 return false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800761 }
762
763 CHECK(timestamps != msg.message().has_data())
764 << ": Got timestamps and data mixed up on a node. "
765 << FlatbufferToJson(msg);
766
767 data_.emplace_back(std::move(msg));
768
769 if (data_.size() == 1u) {
770 // Yup, new data. Notify.
771 if (timestamps) {
772 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
773 } else {
774 timestamp_merger->Update(split_reader, front_timestamp());
775 }
776 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800777
778 return true;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800779}
780
Austin Schuh2f8fd752020-09-01 22:38:28 -0700781void SplitMessageReader::MessageHeaderQueue::PopFront() {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800782 data_.pop_front();
783 if (data_.size() != 0u) {
784 // Yup, new data.
785 if (timestamps) {
786 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
787 } else {
788 timestamp_merger->Update(split_reader, front_timestamp());
789 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700790 } else {
791 // Poke anyways to update the heap.
792 if (timestamps) {
793 timestamp_merger->UpdateTimestamp(
794 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
795 } else {
796 timestamp_merger->Update(
797 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
798 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800799 }
Austin Schuh05b70472020-01-01 17:11:17 -0800800}
801
802namespace {
803
Austin Schuh6f3babe2020-01-26 20:34:50 -0800804bool SplitMessageReaderHeapCompare(
805 const std::tuple<monotonic_clock::time_point, uint32_t,
806 SplitMessageReader *>
807 first,
808 const std::tuple<monotonic_clock::time_point, uint32_t,
809 SplitMessageReader *>
810 second) {
811 if (std::get<0>(first) > std::get<0>(second)) {
812 return true;
813 } else if (std::get<0>(first) == std::get<0>(second)) {
814 if (std::get<1>(first) > std::get<1>(second)) {
815 return true;
816 } else if (std::get<1>(first) == std::get<1>(second)) {
817 return std::get<2>(first) > std::get<2>(second);
818 } else {
819 return false;
820 }
821 } else {
822 return false;
823 }
824}
825
Austin Schuh05b70472020-01-01 17:11:17 -0800826bool ChannelHeapCompare(
827 const std::pair<monotonic_clock::time_point, int> first,
828 const std::pair<monotonic_clock::time_point, int> second) {
829 if (first.first > second.first) {
830 return true;
831 } else if (first.first == second.first) {
832 return first.second > second.second;
833 } else {
834 return false;
835 }
836}
837
838} // namespace
839
Austin Schuh6f3babe2020-01-26 20:34:50 -0800840TimestampMerger::TimestampMerger(
841 const Configuration *configuration,
842 std::vector<SplitMessageReader *> split_message_readers, int channel_index,
843 const Node *target_node, ChannelMerger *channel_merger)
844 : configuration_(configuration),
845 split_message_readers_(std::move(split_message_readers)),
846 channel_index_(channel_index),
847 node_index_(configuration::MultiNode(configuration)
848 ? configuration::GetNodeIndex(configuration, target_node)
849 : -1),
850 channel_merger_(channel_merger) {
851 // Tell the readers we care so they know who to notify.
Austin Schuhcde938c2020-02-02 17:30:07 -0800852 VLOG(1) << "Configuring channel " << channel_index << " target node "
853 << FlatbufferToJson(target_node);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800854 for (SplitMessageReader *reader : split_message_readers_) {
855 reader->SetTimestampMerger(this, channel_index, target_node);
856 }
857
858 // And then determine if we need to track timestamps.
859 const Channel *channel = configuration->channels()->Get(channel_index);
860 if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&
861 configuration::ChannelIsReadableOnNode(channel, target_node)) {
862 has_timestamps_ = true;
863 }
864}
865
866void TimestampMerger::PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800867 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
868 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800869 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700870 if (split_message_reader != nullptr) {
871 DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),
872 [split_message_reader](
873 const std::tuple<monotonic_clock::time_point,
874 uint32_t, SplitMessageReader *>
875 x) {
876 return std::get<2>(x) == split_message_reader;
877 }) == message_heap_.end())
878 << ": Pushing message when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800879
Austin Schuh2f8fd752020-09-01 22:38:28 -0700880 message_heap_.push_back(std::make_tuple(
881 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800882
Austin Schuh2f8fd752020-09-01 22:38:28 -0700883 std::push_heap(message_heap_.begin(), message_heap_.end(),
884 &SplitMessageReaderHeapCompare);
885 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800886
887 // If we are just a data merger, don't wait for timestamps.
888 if (!has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700889 if (!message_heap_.empty()) {
890 channel_merger_->Update(std::get<0>(message_heap_[0]), channel_index_);
891 pushed_ = true;
892 } else {
893 // Remove ourselves if we are empty.
894 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
895 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800896 }
897}
898
Austin Schuhcde938c2020-02-02 17:30:07 -0800899std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
900TimestampMerger::oldest_message() const {
901 CHECK_GT(message_heap_.size(), 0u);
902 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
903 oldest_message_reader = message_heap_.front();
904 return std::get<2>(oldest_message_reader)->oldest_message(channel_index_);
905}
906
907std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
908TimestampMerger::oldest_timestamp() const {
909 CHECK_GT(timestamp_heap_.size(), 0u);
910 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
911 oldest_message_reader = timestamp_heap_.front();
912 return std::get<2>(oldest_message_reader)
913 ->oldest_message(channel_index_, node_index_);
914}
915
Austin Schuh6f3babe2020-01-26 20:34:50 -0800916void TimestampMerger::PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800917 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
918 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800919 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700920 if (split_message_reader != nullptr) {
921 DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),
922 [split_message_reader](
923 const std::tuple<monotonic_clock::time_point,
924 uint32_t, SplitMessageReader *>
925 x) {
926 return std::get<2>(x) == split_message_reader;
927 }) == timestamp_heap_.end())
928 << ": Pushing timestamp when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800929
Austin Schuh2f8fd752020-09-01 22:38:28 -0700930 timestamp_heap_.push_back(std::make_tuple(
931 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800932
Austin Schuh2f8fd752020-09-01 22:38:28 -0700933 std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
934 SplitMessageReaderHeapCompare);
935 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800936
937 // If we are a timestamp merger, don't wait for data. Missing data will be
938 // caught at read time.
939 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700940 if (!timestamp_heap_.empty()) {
941 channel_merger_->Update(std::get<0>(timestamp_heap_[0]), channel_index_);
942 pushed_ = true;
943 } else {
944 // Remove ourselves if we are empty.
945 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
946 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800947 }
948}
949
950std::tuple<monotonic_clock::time_point, uint32_t,
951 FlatbufferVector<MessageHeader>>
952TimestampMerger::PopMessageHeap() {
953 // Pop the oldest message reader pointer off the heap.
954 CHECK_GT(message_heap_.size(), 0u);
955 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
956 oldest_message_reader = message_heap_.front();
957
958 std::pop_heap(message_heap_.begin(), message_heap_.end(),
959 &SplitMessageReaderHeapCompare);
960 message_heap_.pop_back();
961
962 // Pop the oldest message. This re-pushes any messages from the reader to the
963 // message heap.
964 std::tuple<monotonic_clock::time_point, uint32_t,
965 FlatbufferVector<MessageHeader>>
966 oldest_message =
967 std::get<2>(oldest_message_reader)->PopOldest(channel_index_);
968
969 // Confirm that the time and queue_index we have recorded matches.
970 CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));
971 CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));
972
973 // Now, keep reading until we have found all duplicates.
Brian Silverman8a32ce62020-08-12 12:02:38 -0700974 while (!message_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800975 // See if it is a duplicate.
976 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
977 next_oldest_message_reader = message_heap_.front();
978
Austin Schuhcde938c2020-02-02 17:30:07 -0800979 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
980 next_oldest_message_time = std::get<2>(next_oldest_message_reader)
981 ->oldest_message(channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800982
983 if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&
984 std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {
985 // Pop the message reader pointer.
986 std::pop_heap(message_heap_.begin(), message_heap_.end(),
987 &SplitMessageReaderHeapCompare);
988 message_heap_.pop_back();
989
990 // Pop the next oldest message. This re-pushes any messages from the
991 // reader.
992 std::tuple<monotonic_clock::time_point, uint32_t,
993 FlatbufferVector<MessageHeader>>
994 next_oldest_message = std::get<2>(next_oldest_message_reader)
995 ->PopOldest(channel_index_);
996
997 // And make sure the message matches in it's entirety.
998 CHECK(std::get<2>(oldest_message).span() ==
999 std::get<2>(next_oldest_message).span())
1000 << ": Data at the same timestamp doesn't match.";
1001 } else {
1002 break;
1003 }
1004 }
1005
1006 return oldest_message;
1007}
1008
1009std::tuple<monotonic_clock::time_point, uint32_t,
1010 FlatbufferVector<MessageHeader>>
1011TimestampMerger::PopTimestampHeap() {
1012 // Pop the oldest message reader pointer off the heap.
1013 CHECK_GT(timestamp_heap_.size(), 0u);
1014
1015 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1016 oldest_timestamp_reader = timestamp_heap_.front();
1017
1018 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1019 &SplitMessageReaderHeapCompare);
1020 timestamp_heap_.pop_back();
1021
1022 CHECK(node_index_ != -1) << ": Timestamps in a single node environment";
1023
1024 // Pop the oldest message. This re-pushes any timestamps from the reader to
1025 // the timestamp heap.
1026 std::tuple<monotonic_clock::time_point, uint32_t,
1027 FlatbufferVector<MessageHeader>>
1028 oldest_timestamp = std::get<2>(oldest_timestamp_reader)
Austin Schuh2f8fd752020-09-01 22:38:28 -07001029 ->PopOldestTimestamp(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001030
1031 // Confirm that the time we have recorded matches.
1032 CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));
1033 CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));
1034
Austin Schuh2f8fd752020-09-01 22:38:28 -07001035 // Now, keep reading until we have found all duplicates.
1036 while (!timestamp_heap_.empty()) {
1037 // See if it is a duplicate.
1038 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1039 next_oldest_timestamp_reader = timestamp_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001040
Austin Schuh2f8fd752020-09-01 22:38:28 -07001041 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1042 next_oldest_timestamp_time =
1043 std::get<2>(next_oldest_timestamp_reader)
1044 ->oldest_message(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001045
Austin Schuh2f8fd752020-09-01 22:38:28 -07001046 if (std::get<0>(next_oldest_timestamp_time) ==
1047 std::get<0>(oldest_timestamp) &&
1048 std::get<1>(next_oldest_timestamp_time) ==
1049 std::get<1>(oldest_timestamp)) {
1050 // Pop the timestamp reader pointer.
1051 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1052 &SplitMessageReaderHeapCompare);
1053 timestamp_heap_.pop_back();
1054
1055 // Pop the next oldest timestamp. This re-pushes any messages from the
1056 // reader.
1057 std::tuple<monotonic_clock::time_point, uint32_t,
1058 FlatbufferVector<MessageHeader>>
1059 next_oldest_timestamp =
1060 std::get<2>(next_oldest_timestamp_reader)
1061 ->PopOldestTimestamp(channel_index_, node_index_);
1062
1063 // And make sure the contents matches in it's entirety.
1064 CHECK(std::get<2>(oldest_timestamp).span() ==
1065 std::get<2>(next_oldest_timestamp).span())
1066 << ": Data at the same timestamp doesn't match, "
1067 << aos::FlatbufferToJson(std::get<2>(oldest_timestamp)) << " vs "
1068 << aos::FlatbufferToJson(std::get<2>(next_oldest_timestamp)) << " "
1069 << absl::BytesToHexString(std::string_view(
1070 reinterpret_cast<const char *>(
1071 std::get<2>(oldest_timestamp).span().data()),
1072 std::get<2>(oldest_timestamp).span().size()))
1073 << " vs "
1074 << absl::BytesToHexString(std::string_view(
1075 reinterpret_cast<const char *>(
1076 std::get<2>(next_oldest_timestamp).span().data()),
1077 std::get<2>(next_oldest_timestamp).span().size()));
1078
1079 } else {
1080 break;
1081 }
Austin Schuh8bd96322020-02-13 21:18:22 -08001082 }
1083
Austin Schuh2f8fd752020-09-01 22:38:28 -07001084 return oldest_timestamp;
Austin Schuh8bd96322020-02-13 21:18:22 -08001085}
1086
Austin Schuh6f3babe2020-01-26 20:34:50 -08001087std::tuple<TimestampMerger::DeliveryTimestamp, FlatbufferVector<MessageHeader>>
1088TimestampMerger::PopOldest() {
1089 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001090 VLOG(1) << "Looking for matching timestamp for "
1091 << configuration::StrippedChannelToString(
1092 configuration_->channels()->Get(channel_index_))
1093 << " (" << channel_index_ << ") "
1094 << " at " << std::get<0>(oldest_timestamp());
1095
Austin Schuh8bd96322020-02-13 21:18:22 -08001096 // Read the timestamps.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001097 std::tuple<monotonic_clock::time_point, uint32_t,
1098 FlatbufferVector<MessageHeader>>
1099 oldest_timestamp = PopTimestampHeap();
1100
1101 TimestampMerger::DeliveryTimestamp timestamp;
1102 timestamp.monotonic_event_time =
1103 monotonic_clock::time_point(chrono::nanoseconds(
1104 std::get<2>(oldest_timestamp).message().monotonic_sent_time()));
1105 timestamp.realtime_event_time =
1106 realtime_clock::time_point(chrono::nanoseconds(
1107 std::get<2>(oldest_timestamp).message().realtime_sent_time()));
1108
1109 // Consistency check.
1110 CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));
1111 CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),
1112 std::get<1>(oldest_timestamp));
1113
1114 monotonic_clock::time_point remote_timestamp_monotonic_time(
1115 chrono::nanoseconds(
1116 std::get<2>(oldest_timestamp).message().monotonic_remote_time()));
1117
Austin Schuh8bd96322020-02-13 21:18:22 -08001118 // See if we have any data. If not, pass the problem up the chain.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001119 if (message_heap_.empty()) {
Austin Schuhee711052020-08-24 16:06:09 -07001120 LOG(WARNING) << MaybeNodeName(configuration_->nodes()->Get(node_index_))
1121 << "No data to match timestamp on "
1122 << configuration::CleanedChannelToString(
1123 configuration_->channels()->Get(channel_index_))
1124 << " (" << channel_index_ << ")";
Austin Schuh8bd96322020-02-13 21:18:22 -08001125 return std::make_tuple(timestamp,
1126 std::move(std::get<2>(oldest_timestamp)));
1127 }
1128
Austin Schuh6f3babe2020-01-26 20:34:50 -08001129 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001130 {
1131 // Ok, now try grabbing data until we find one which matches.
1132 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1133 oldest_message_ref = oldest_message();
1134
1135 // Time at which the message was sent (this message is written from the
1136 // sending node's perspective.
1137 monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(
1138 std::get<2>(oldest_message_ref)->monotonic_sent_time()));
1139
1140 if (remote_monotonic_time < remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001141 LOG(WARNING) << configuration_->nodes()
1142 ->Get(node_index_)
1143 ->name()
1144 ->string_view()
1145 << " Undelivered message, skipping. Remote time is "
1146 << remote_monotonic_time << " timestamp is "
1147 << remote_timestamp_monotonic_time << " on channel "
1148 << configuration::StrippedChannelToString(
1149 configuration_->channels()->Get(channel_index_))
1150 << " (" << channel_index_ << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -08001151 PopMessageHeap();
1152 continue;
1153 } else if (remote_monotonic_time > remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001154 LOG(WARNING) << configuration_->nodes()
1155 ->Get(node_index_)
1156 ->name()
1157 ->string_view()
1158 << " Data not found. Remote time should be "
1159 << remote_timestamp_monotonic_time
1160 << ", message time is " << remote_monotonic_time
1161 << " on channel "
1162 << configuration::StrippedChannelToString(
1163 configuration_->channels()->Get(channel_index_))
Austin Schuh2f8fd752020-09-01 22:38:28 -07001164 << " (" << channel_index_ << ")"
1165 << (VLOG_IS_ON(1) ? DebugString() : "");
Austin Schuhcde938c2020-02-02 17:30:07 -08001166 return std::make_tuple(timestamp,
1167 std::move(std::get<2>(oldest_timestamp)));
1168 }
1169
1170 timestamp.monotonic_remote_time = remote_monotonic_time;
1171 }
1172
Austin Schuh2f8fd752020-09-01 22:38:28 -07001173 VLOG(1) << "Found matching data "
1174 << configuration::StrippedChannelToString(
1175 configuration_->channels()->Get(channel_index_))
1176 << " (" << channel_index_ << ")";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001177 std::tuple<monotonic_clock::time_point, uint32_t,
1178 FlatbufferVector<MessageHeader>>
1179 oldest_message = PopMessageHeap();
1180
Austin Schuh6f3babe2020-01-26 20:34:50 -08001181 timestamp.realtime_remote_time =
1182 realtime_clock::time_point(chrono::nanoseconds(
1183 std::get<2>(oldest_message).message().realtime_sent_time()));
1184 timestamp.remote_queue_index =
1185 std::get<2>(oldest_message).message().queue_index();
1186
Austin Schuhcde938c2020-02-02 17:30:07 -08001187 CHECK_EQ(timestamp.monotonic_remote_time,
1188 remote_timestamp_monotonic_time);
1189
1190 CHECK_EQ(timestamp.remote_queue_index,
1191 std::get<2>(oldest_timestamp).message().remote_queue_index())
1192 << ": " << FlatbufferToJson(&std::get<2>(oldest_timestamp).message())
1193 << " data "
1194 << FlatbufferToJson(&std::get<2>(oldest_message).message());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001195
Austin Schuh30dd5c52020-08-01 14:43:44 -07001196 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001197 }
1198 } else {
1199 std::tuple<monotonic_clock::time_point, uint32_t,
1200 FlatbufferVector<MessageHeader>>
1201 oldest_message = PopMessageHeap();
1202
1203 TimestampMerger::DeliveryTimestamp timestamp;
1204 timestamp.monotonic_event_time =
1205 monotonic_clock::time_point(chrono::nanoseconds(
1206 std::get<2>(oldest_message).message().monotonic_sent_time()));
1207 timestamp.realtime_event_time =
1208 realtime_clock::time_point(chrono::nanoseconds(
1209 std::get<2>(oldest_message).message().realtime_sent_time()));
1210 timestamp.remote_queue_index = 0xffffffff;
1211
1212 CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);
1213 CHECK_EQ(std::get<1>(oldest_message),
1214 std::get<2>(oldest_message).message().queue_index());
1215
Austin Schuh30dd5c52020-08-01 14:43:44 -07001216 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001217 }
1218}
1219
Austin Schuh8bd96322020-02-13 21:18:22 -08001220void TimestampMerger::NoticeAtEnd() { channel_merger_->NoticeAtEnd(); }
1221
Austin Schuh6f3babe2020-01-26 20:34:50 -08001222namespace {
1223std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(
1224 const std::vector<std::vector<std::string>> &filenames) {
1225 CHECK_GT(filenames.size(), 0u);
1226 // Build up all the SplitMessageReaders.
1227 std::vector<std::unique_ptr<SplitMessageReader>> result;
1228 for (const std::vector<std::string> &filenames : filenames) {
1229 result.emplace_back(std::make_unique<SplitMessageReader>(filenames));
1230 }
1231 return result;
1232}
1233} // namespace
1234
1235ChannelMerger::ChannelMerger(
1236 const std::vector<std::vector<std::string>> &filenames)
1237 : split_message_readers_(MakeSplitMessageReaders(filenames)),
Austin Schuh97789fc2020-08-01 14:42:45 -07001238 log_file_header_(split_message_readers_[0]->raw_log_file_header()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001239 // Now, confirm that the configuration matches for each and pick a start time.
1240 // Also return the list of possible nodes.
1241 for (const std::unique_ptr<SplitMessageReader> &reader :
1242 split_message_readers_) {
1243 CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),
1244 reader->log_file_header()->configuration()))
1245 << ": Replaying log files with different configurations isn't "
1246 "supported";
1247 }
1248
1249 nodes_ = configuration::GetNodes(configuration());
1250}
1251
1252bool ChannelMerger::SetNode(const Node *target_node) {
1253 std::vector<SplitMessageReader *> split_message_readers;
1254 for (const std::unique_ptr<SplitMessageReader> &reader :
1255 split_message_readers_) {
1256 split_message_readers.emplace_back(reader.get());
1257 }
1258
1259 // Go find a log_file_header for this node.
1260 {
1261 bool found_node = false;
1262
1263 for (const std::unique_ptr<SplitMessageReader> &reader :
1264 split_message_readers_) {
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001265 // In order to identify which logfile(s) map to the target node, do a
1266 // logical comparison of the nodes, by confirming that we are either in a
1267 // single-node setup (where the nodes will both be nullptr) or that the
1268 // node names match (but the other node fields--e.g., hostname lists--may
1269 // not).
1270 const bool both_null =
1271 reader->node() == nullptr && target_node == nullptr;
1272 const bool both_have_name =
1273 (reader->node() != nullptr) && (target_node != nullptr) &&
1274 (reader->node()->has_name() && target_node->has_name());
1275 const bool node_names_identical =
Brian Silvermand90905f2020-09-23 14:42:56 -07001276 both_have_name && (reader->node()->name()->string_view() ==
1277 target_node->name()->string_view());
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001278 if (both_null || node_names_identical) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001279 if (!found_node) {
1280 found_node = true;
1281 log_file_header_ = CopyFlatBuffer(reader->log_file_header());
Austin Schuhcde938c2020-02-02 17:30:07 -08001282 VLOG(1) << "Found log file " << reader->filename() << " with node "
1283 << FlatbufferToJson(reader->node()) << " start_time "
1284 << monotonic_start_time();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001285 } else {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001286 // Find the earliest start time. That way, if we get a full log file
1287 // directly from the node, and a partial later, we start with the
1288 // full. Update our header to match that.
1289 const monotonic_clock::time_point new_monotonic_start_time(
1290 chrono::nanoseconds(
1291 reader->log_file_header()->monotonic_start_time()));
1292 const realtime_clock::time_point new_realtime_start_time(
1293 chrono::nanoseconds(
1294 reader->log_file_header()->realtime_start_time()));
1295
1296 if (monotonic_start_time() == monotonic_clock::min_time ||
1297 (new_monotonic_start_time != monotonic_clock::min_time &&
1298 new_monotonic_start_time < monotonic_start_time())) {
1299 log_file_header_.mutable_message()->mutate_monotonic_start_time(
1300 new_monotonic_start_time.time_since_epoch().count());
1301 log_file_header_.mutable_message()->mutate_realtime_start_time(
1302 new_realtime_start_time.time_since_epoch().count());
1303 VLOG(1) << "Updated log file " << reader->filename()
1304 << " with node " << FlatbufferToJson(reader->node())
1305 << " start_time " << new_monotonic_start_time;
1306 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001307 }
1308 }
1309 }
1310
1311 if (!found_node) {
1312 LOG(WARNING) << "Failed to find log file for node "
1313 << FlatbufferToJson(target_node);
1314 return false;
1315 }
1316 }
1317
1318 // Build up all the timestamp mergers. This connects up all the
1319 // SplitMessageReaders.
1320 timestamp_mergers_.reserve(configuration()->channels()->size());
1321 for (size_t channel_index = 0;
1322 channel_index < configuration()->channels()->size(); ++channel_index) {
1323 timestamp_mergers_.emplace_back(
1324 configuration(), split_message_readers, channel_index,
1325 configuration::GetNode(configuration(), target_node), this);
1326 }
1327
1328 // And prime everything.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001329 for (std::unique_ptr<SplitMessageReader> &split_message_reader :
1330 split_message_readers_) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001331 split_message_reader->QueueMessages(
1332 split_message_reader->monotonic_start_time());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001333 }
1334
1335 node_ = configuration::GetNodeOrDie(configuration(), target_node);
1336 return true;
1337}
1338
Austin Schuh858c9f32020-08-31 16:56:12 -07001339monotonic_clock::time_point ChannelMerger::OldestMessageTime() const {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001340 if (channel_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001341 return monotonic_clock::max_time;
1342 }
1343 return channel_heap_.front().first;
1344}
1345
1346void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,
1347 int channel_index) {
1348 // Pop and recreate the heap if it has already been pushed. And since we are
1349 // pushing again, we don't need to clear pushed.
1350 if (timestamp_mergers_[channel_index].pushed()) {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001351 const auto channel_iterator = std::find_if(
Austin Schuh6f3babe2020-01-26 20:34:50 -08001352 channel_heap_.begin(), channel_heap_.end(),
1353 [channel_index](const std::pair<monotonic_clock::time_point, int> x) {
1354 return x.second == channel_index;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001355 });
1356 DCHECK(channel_iterator != channel_heap_.end());
1357 if (std::get<0>(*channel_iterator) == timestamp) {
1358 // It's already in the heap, in the correct spot, so nothing
1359 // more for us to do here.
1360 return;
1361 }
1362 channel_heap_.erase(channel_iterator);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001363 std::make_heap(channel_heap_.begin(), channel_heap_.end(),
1364 ChannelHeapCompare);
1365 }
1366
Austin Schuh2f8fd752020-09-01 22:38:28 -07001367 if (timestamp == monotonic_clock::min_time) {
1368 timestamp_mergers_[channel_index].set_pushed(false);
1369 return;
1370 }
1371
Austin Schuh05b70472020-01-01 17:11:17 -08001372 channel_heap_.push_back(std::make_pair(timestamp, channel_index));
1373
1374 // The default sort puts the newest message first. Use a custom comparator to
1375 // put the oldest message first.
1376 std::push_heap(channel_heap_.begin(), channel_heap_.end(),
1377 ChannelHeapCompare);
1378}
1379
Austin Schuh2f8fd752020-09-01 22:38:28 -07001380void ChannelMerger::VerifyHeaps() {
Austin Schuh661a8d82020-09-13 17:25:56 -07001381 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1382 channel_heap_;
1383 std::make_heap(channel_heap.begin(), channel_heap.end(), &ChannelHeapCompare);
Austin Schuh2f8fd752020-09-01 22:38:28 -07001384
Austin Schuh661a8d82020-09-13 17:25:56 -07001385 for (size_t i = 0; i < channel_heap_.size(); ++i) {
1386 CHECK(channel_heap_[i] == channel_heap[i]) << ": Heaps diverged...";
1387 CHECK_EQ(
1388 std::get<0>(channel_heap[i]),
1389 timestamp_mergers_[std::get<1>(channel_heap[i])].channel_merger_time());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001390 }
1391}
1392
Austin Schuh6f3babe2020-01-26 20:34:50 -08001393std::tuple<TimestampMerger::DeliveryTimestamp, int,
1394 FlatbufferVector<MessageHeader>>
1395ChannelMerger::PopOldest() {
Austin Schuh8bd96322020-02-13 21:18:22 -08001396 CHECK_GT(channel_heap_.size(), 0u);
Austin Schuh05b70472020-01-01 17:11:17 -08001397 std::pair<monotonic_clock::time_point, int> oldest_channel_data =
1398 channel_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001399 int channel_index = oldest_channel_data.second;
Austin Schuh05b70472020-01-01 17:11:17 -08001400 std::pop_heap(channel_heap_.begin(), channel_heap_.end(),
1401 &ChannelHeapCompare);
1402 channel_heap_.pop_back();
Austin Schuh8bd96322020-02-13 21:18:22 -08001403
Austin Schuh6f3babe2020-01-26 20:34:50 -08001404 timestamp_mergers_[channel_index].set_pushed(false);
Austin Schuh05b70472020-01-01 17:11:17 -08001405
Austin Schuh6f3babe2020-01-26 20:34:50 -08001406 TimestampMerger *merger = &timestamp_mergers_[channel_index];
Austin Schuh05b70472020-01-01 17:11:17 -08001407
Austin Schuhcde938c2020-02-02 17:30:07 -08001408 // Merger handles any queueing needed from here.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001409 std::tuple<TimestampMerger::DeliveryTimestamp,
1410 FlatbufferVector<MessageHeader>>
1411 message = merger->PopOldest();
Brian Silverman8a32ce62020-08-12 12:02:38 -07001412 DCHECK_EQ(std::get<0>(message).monotonic_event_time,
1413 oldest_channel_data.first)
1414 << ": channel_heap_ was corrupted for " << channel_index << ": "
1415 << DebugString();
Austin Schuh05b70472020-01-01 17:11:17 -08001416
Austin Schuh2f8fd752020-09-01 22:38:28 -07001417 CHECK_GE(std::get<0>(message).monotonic_event_time, last_popped_time_)
1418 << ": " << MaybeNodeName(log_file_header()->node())
1419 << "Messages came off the queue out of order. " << DebugString();
1420 last_popped_time_ = std::get<0>(message).monotonic_event_time;
1421
1422 VLOG(1) << "Popped " << last_popped_time_ << " "
1423 << configuration::StrippedChannelToString(
1424 configuration()->channels()->Get(channel_index))
1425 << " (" << channel_index << ")";
1426
Austin Schuh6f3babe2020-01-26 20:34:50 -08001427 return std::make_tuple(std::get<0>(message), channel_index,
1428 std::move(std::get<1>(message)));
1429}
1430
Austin Schuhcde938c2020-02-02 17:30:07 -08001431std::string SplitMessageReader::MessageHeaderQueue::DebugString() const {
1432 std::stringstream ss;
1433 for (size_t i = 0; i < data_.size(); ++i) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001434 if (i < 5 || i + 5 > data_.size()) {
1435 if (timestamps) {
1436 ss << " msg: ";
1437 } else {
1438 ss << " timestamp: ";
1439 }
1440 ss << monotonic_clock::time_point(
1441 chrono::nanoseconds(data_[i].message().monotonic_sent_time()))
Austin Schuhcde938c2020-02-02 17:30:07 -08001442 << " ("
Austin Schuh2f8fd752020-09-01 22:38:28 -07001443 << realtime_clock::time_point(
1444 chrono::nanoseconds(data_[i].message().realtime_sent_time()))
1445 << ") " << data_[i].message().queue_index();
1446 if (timestamps) {
1447 ss << " <- remote "
1448 << monotonic_clock::time_point(chrono::nanoseconds(
1449 data_[i].message().monotonic_remote_time()))
1450 << " ("
1451 << realtime_clock::time_point(chrono::nanoseconds(
1452 data_[i].message().realtime_remote_time()))
1453 << ")";
1454 }
1455 ss << "\n";
1456 } else if (i == 5) {
1457 ss << " ...\n";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001458 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001459 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001460
Austin Schuhcde938c2020-02-02 17:30:07 -08001461 return ss.str();
1462}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001463
Austin Schuhcde938c2020-02-02 17:30:07 -08001464std::string SplitMessageReader::DebugString(int channel) const {
1465 std::stringstream ss;
1466 ss << "[\n";
1467 ss << channels_[channel].data.DebugString();
1468 ss << " ]";
1469 return ss.str();
1470}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001471
Austin Schuhcde938c2020-02-02 17:30:07 -08001472std::string SplitMessageReader::DebugString(int channel, int node_index) const {
1473 std::stringstream ss;
1474 ss << "[\n";
1475 ss << channels_[channel].timestamps[node_index].DebugString();
1476 ss << " ]";
1477 return ss.str();
1478}
1479
1480std::string TimestampMerger::DebugString() const {
1481 std::stringstream ss;
1482
1483 if (timestamp_heap_.size() > 0) {
1484 ss << " timestamp_heap {\n";
1485 std::vector<
1486 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1487 timestamp_heap = timestamp_heap_;
1488 while (timestamp_heap.size() > 0u) {
1489 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1490 oldest_timestamp_reader = timestamp_heap.front();
1491
1492 ss << " " << std::get<2>(oldest_timestamp_reader) << " "
1493 << std::get<0>(oldest_timestamp_reader) << " queue_index ("
1494 << std::get<1>(oldest_timestamp_reader) << ") ttq "
1495 << std::get<2>(oldest_timestamp_reader)->time_to_queue() << " "
1496 << std::get<2>(oldest_timestamp_reader)->filename() << " -> "
1497 << std::get<2>(oldest_timestamp_reader)
1498 ->DebugString(channel_index_, node_index_)
1499 << "\n";
1500
1501 std::pop_heap(timestamp_heap.begin(), timestamp_heap.end(),
1502 &SplitMessageReaderHeapCompare);
1503 timestamp_heap.pop_back();
1504 }
1505 ss << " }\n";
1506 }
1507
1508 ss << " message_heap {\n";
1509 {
1510 std::vector<
1511 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1512 message_heap = message_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001513 while (!message_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001514 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1515 oldest_message_reader = message_heap.front();
1516
1517 ss << " " << std::get<2>(oldest_message_reader) << " "
1518 << std::get<0>(oldest_message_reader) << " queue_index ("
1519 << std::get<1>(oldest_message_reader) << ") ttq "
1520 << std::get<2>(oldest_message_reader)->time_to_queue() << " "
1521 << std::get<2>(oldest_message_reader)->filename() << " -> "
1522 << std::get<2>(oldest_message_reader)->DebugString(channel_index_)
1523 << "\n";
1524
1525 std::pop_heap(message_heap.begin(), message_heap.end(),
1526 &SplitMessageReaderHeapCompare);
1527 message_heap.pop_back();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001528 }
Austin Schuh05b70472020-01-01 17:11:17 -08001529 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001530 ss << " }";
1531
1532 return ss.str();
1533}
1534
1535std::string ChannelMerger::DebugString() const {
1536 std::stringstream ss;
1537 ss << "start_time " << realtime_start_time() << " " << monotonic_start_time()
1538 << "\n";
1539 ss << "channel_heap {\n";
1540 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1541 channel_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001542 while (!channel_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001543 std::tuple<monotonic_clock::time_point, int> channel = channel_heap.front();
1544 ss << " " << std::get<0>(channel) << " (" << std::get<1>(channel) << ") "
1545 << configuration::CleanedChannelToString(
1546 configuration()->channels()->Get(std::get<1>(channel)))
1547 << "\n";
1548
1549 ss << timestamp_mergers_[std::get<1>(channel)].DebugString() << "\n";
1550
1551 std::pop_heap(channel_heap.begin(), channel_heap.end(),
1552 &ChannelHeapCompare);
1553 channel_heap.pop_back();
1554 }
1555 ss << "}";
1556
1557 return ss.str();
Austin Schuh05b70472020-01-01 17:11:17 -08001558}
1559
Austin Schuhee711052020-08-24 16:06:09 -07001560std::string MaybeNodeName(const Node *node) {
1561 if (node != nullptr) {
1562 return node->name()->str() + " ";
1563 }
1564 return "";
1565}
1566
Brian Silvermanf51499a2020-09-21 12:49:08 -07001567} // namespace aos::logger