blob: 70dae61f580fc5ed77e547c95b69b7bfebe0efd4 [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#include "aos/events/logging/logfile_utils.h"
2
3#include <fcntl.h>
Austin Schuha36c8902019-12-30 18:07:15 -08004#include <sys/stat.h>
5#include <sys/types.h>
6#include <sys/uio.h>
7
Brian Silvermanf51499a2020-09-21 12:49:08 -07008#include <algorithm>
9#include <climits>
Austin Schuha36c8902019-12-30 18:07:15 -080010
Austin Schuhe4fca832020-03-07 16:58:53 -080011#include "absl/strings/escaping.h"
Austin Schuh05b70472020-01-01 17:11:17 -080012#include "aos/configuration.h"
Austin Schuhfa895892020-01-07 20:07:41 -080013#include "aos/flatbuffer_merge.h"
Austin Schuh6f3babe2020-01-26 20:34:50 -080014#include "aos/util/file.h"
Austin Schuha36c8902019-12-30 18:07:15 -080015#include "flatbuffers/flatbuffers.h"
Austin Schuh05b70472020-01-01 17:11:17 -080016#include "gflags/gflags.h"
17#include "glog/logging.h"
Austin Schuha36c8902019-12-30 18:07:15 -080018
Brian Silvermanf59fe3f2020-09-22 21:04:09 -070019#if defined(__x86_64__)
20#define ENABLE_LZMA 1
21#elif defined(__aarch64__)
22#define ENABLE_LZMA 1
23#else
24#define ENABLE_LZMA 0
25#endif
26
27#if ENABLE_LZMA
28#include "aos/events/logging/lzma_encoder.h"
29#endif
30
Austin Schuh7fbf5a72020-09-21 16:28:13 -070031DEFINE_int32(flush_size, 128000,
Austin Schuha36c8902019-12-30 18:07:15 -080032 "Number of outstanding bytes to allow before flushing to disk.");
33
Brian Silvermanf51499a2020-09-21 12:49:08 -070034namespace aos::logger {
Austin Schuha36c8902019-12-30 18:07:15 -080035
Austin Schuh05b70472020-01-01 17:11:17 -080036namespace chrono = std::chrono;
37
Brian Silvermanf51499a2020-09-21 12:49:08 -070038DetachedBufferWriter::DetachedBufferWriter(
39 std::string_view filename, std::unique_ptr<DetachedBufferEncoder> encoder)
40 : filename_(filename), encoder_(std::move(encoder)) {
Austin Schuh6f3babe2020-01-26 20:34:50 -080041 util::MkdirP(filename, 0777);
42 fd_ = open(std::string(filename).c_str(),
43 O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
44 VLOG(1) << "Opened " << filename << " for writing";
45 PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
Austin Schuha36c8902019-12-30 18:07:15 -080046}
47
48DetachedBufferWriter::~DetachedBufferWriter() {
Brian Silverman0465fcf2020-09-24 00:29:18 -070049 Close();
50 if (ran_out_of_space_) {
51 CHECK(acknowledge_ran_out_of_space_)
52 << ": Unacknowledged out of disk space, log file was not completed";
Brian Silvermanf51499a2020-09-21 12:49:08 -070053 }
Austin Schuh2f8fd752020-09-01 22:38:28 -070054}
55
Brian Silvermand90905f2020-09-23 14:42:56 -070056DetachedBufferWriter::DetachedBufferWriter(DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070057 *this = std::move(other);
58}
59
Brian Silverman87ac0402020-09-17 14:47:01 -070060// When other is destroyed "soon" (which it should be because we're getting an
61// rvalue reference to it), it will flush etc all the data we have queued up
62// (because that data will then be its data).
Austin Schuh2f8fd752020-09-01 22:38:28 -070063DetachedBufferWriter &DetachedBufferWriter::operator=(
64 DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070065 std::swap(filename_, other.filename_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070066 std::swap(encoder_, other.encoder_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070067 std::swap(fd_, other.fd_);
Brian Silverman0465fcf2020-09-24 00:29:18 -070068 std::swap(ran_out_of_space_, other.ran_out_of_space_);
69 std::swap(acknowledge_ran_out_of_space_, other.acknowledge_ran_out_of_space_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070070 std::swap(iovec_, other.iovec_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070071 std::swap(max_write_time_, other.max_write_time_);
72 std::swap(max_write_time_bytes_, other.max_write_time_bytes_);
73 std::swap(max_write_time_messages_, other.max_write_time_messages_);
74 std::swap(total_write_time_, other.total_write_time_);
75 std::swap(total_write_count_, other.total_write_count_);
76 std::swap(total_write_messages_, other.total_write_messages_);
77 std::swap(total_write_bytes_, other.total_write_bytes_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070078 return *this;
Austin Schuha36c8902019-12-30 18:07:15 -080079}
80
Brian Silvermanf51499a2020-09-21 12:49:08 -070081void DetachedBufferWriter::QueueSpan(absl::Span<const uint8_t> span) {
82 if (encoder_->may_bypass() && span.size() > 4096u) {
83 // Over this threshold, we'll assume it's cheaper to add an extra
84 // syscall to write the data immediately instead of copying it to
85 // enqueue.
Austin Schuha36c8902019-12-30 18:07:15 -080086
Brian Silverman0465fcf2020-09-24 00:29:18 -070087 if (ran_out_of_space_) {
88 // We don't want any later data to be written after space becomes
89 // available, so refuse to write anything more once we've dropped data
90 // because we ran out of space.
91 VLOG(1) << "Ignoring span: " << span.size();
92 return;
93 }
94
Brian Silvermanf51499a2020-09-21 12:49:08 -070095 // First, flush everything.
96 while (encoder_->queue_size() > 0u) {
97 Flush();
98 }
Austin Schuhde031b72020-01-10 19:34:41 -080099
Brian Silvermanf51499a2020-09-21 12:49:08 -0700100 // Then, write it directly.
101 const auto start = aos::monotonic_clock::now();
102 const ssize_t written = write(fd_, span.data(), span.size());
103 const auto end = aos::monotonic_clock::now();
Brian Silverman0465fcf2020-09-24 00:29:18 -0700104 HandleWriteReturn(written, span.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700105 UpdateStatsForWrite(end - start, written, 1);
106 } else {
107 encoder_->Encode(CopySpanAsDetachedBuffer(span));
Austin Schuha36c8902019-12-30 18:07:15 -0800108 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700109
110 FlushAtThreshold();
Austin Schuha36c8902019-12-30 18:07:15 -0800111}
112
Brian Silverman0465fcf2020-09-24 00:29:18 -0700113void DetachedBufferWriter::Close() {
114 if (fd_ == -1) {
115 return;
116 }
117 encoder_->Finish();
118 while (encoder_->queue_size() > 0) {
119 Flush();
120 }
121 if (close(fd_) == -1) {
122 if (errno == ENOSPC) {
123 ran_out_of_space_ = true;
124 } else {
125 PLOG(ERROR) << "Closing log file failed";
126 }
127 }
128 fd_ = -1;
129 VLOG(1) << "Closed " << filename_;
130}
131
Austin Schuha36c8902019-12-30 18:07:15 -0800132void DetachedBufferWriter::Flush() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700133 const auto queue = encoder_->queue();
134 if (queue.empty()) {
Austin Schuha36c8902019-12-30 18:07:15 -0800135 return;
136 }
Brian Silverman0465fcf2020-09-24 00:29:18 -0700137 if (ran_out_of_space_) {
138 // We don't want any later data to be written after space becomes available,
139 // so refuse to write anything more once we've dropped data because we ran
140 // out of space.
141 VLOG(1) << "Ignoring queue: " << queue.size();
142 encoder_->Clear(queue.size());
143 return;
144 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700145
Austin Schuha36c8902019-12-30 18:07:15 -0800146 iovec_.clear();
Brian Silvermanf51499a2020-09-21 12:49:08 -0700147 const size_t iovec_size = std::min<size_t>(queue.size(), IOV_MAX);
148 iovec_.resize(iovec_size);
Austin Schuha36c8902019-12-30 18:07:15 -0800149 size_t counted_size = 0;
Brian Silvermanf51499a2020-09-21 12:49:08 -0700150 for (size_t i = 0; i < iovec_size; ++i) {
151 iovec_[i].iov_base = const_cast<uint8_t *>(queue[i].data());
152 iovec_[i].iov_len = queue[i].size();
153 counted_size += iovec_[i].iov_len;
Austin Schuha36c8902019-12-30 18:07:15 -0800154 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700155
156 const auto start = aos::monotonic_clock::now();
Austin Schuha36c8902019-12-30 18:07:15 -0800157 const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700158 const auto end = aos::monotonic_clock::now();
Brian Silverman0465fcf2020-09-24 00:29:18 -0700159 HandleWriteReturn(written, counted_size);
Brian Silvermanf51499a2020-09-21 12:49:08 -0700160
161 encoder_->Clear(iovec_size);
162
163 UpdateStatsForWrite(end - start, written, iovec_size);
164}
165
Brian Silverman0465fcf2020-09-24 00:29:18 -0700166void DetachedBufferWriter::HandleWriteReturn(ssize_t write_return,
167 size_t write_size) {
168 if (write_return == -1 && errno == ENOSPC) {
169 ran_out_of_space_ = true;
170 return;
171 }
172 PCHECK(write_return >= 0) << ": write failed";
173 if (write_return < static_cast<ssize_t>(write_size)) {
174 // Sometimes this happens instead of ENOSPC. On a real filesystem, this
175 // never seems to happen in any other case. If we ever want to log to a
176 // socket, this will happen more often. However, until we get there, we'll
177 // just assume it means we ran out of space.
178 ran_out_of_space_ = true;
179 return;
180 }
181}
182
Brian Silvermanf51499a2020-09-21 12:49:08 -0700183void DetachedBufferWriter::UpdateStatsForWrite(
184 aos::monotonic_clock::duration duration, ssize_t written, int iovec_size) {
185 if (duration > max_write_time_) {
186 max_write_time_ = duration;
187 max_write_time_bytes_ = written;
188 max_write_time_messages_ = iovec_size;
189 }
190 total_write_time_ += duration;
191 ++total_write_count_;
192 total_write_messages_ += iovec_size;
193 total_write_bytes_ += written;
194}
195
196void DetachedBufferWriter::FlushAtThreshold() {
197 // Flush if we are at the max number of iovs per writev, because there's no
198 // point queueing up any more data in memory. Also flush once we have enough
199 // data queued up.
200 while (encoder_->queued_bytes() > static_cast<size_t>(FLAGS_flush_size) ||
201 encoder_->queue_size() >= IOV_MAX) {
202 Flush();
203 }
Austin Schuha36c8902019-12-30 18:07:15 -0800204}
205
206flatbuffers::Offset<MessageHeader> PackMessage(
207 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
208 int channel_index, LogType log_type) {
209 flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;
210
211 switch (log_type) {
212 case LogType::kLogMessage:
213 case LogType::kLogMessageAndDeliveryTime:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800214 case LogType::kLogRemoteMessage:
Brian Silvermaneaa41d62020-07-08 19:47:35 -0700215 data_offset = fbb->CreateVector(
216 static_cast<const uint8_t *>(context.data), context.size);
Austin Schuha36c8902019-12-30 18:07:15 -0800217 break;
218
219 case LogType::kLogDeliveryTimeOnly:
220 break;
221 }
222
223 MessageHeader::Builder message_header_builder(*fbb);
224 message_header_builder.add_channel_index(channel_index);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800225
226 switch (log_type) {
227 case LogType::kLogRemoteMessage:
228 message_header_builder.add_queue_index(context.remote_queue_index);
229 message_header_builder.add_monotonic_sent_time(
230 context.monotonic_remote_time.time_since_epoch().count());
231 message_header_builder.add_realtime_sent_time(
232 context.realtime_remote_time.time_since_epoch().count());
233 break;
234
235 case LogType::kLogMessage:
236 case LogType::kLogMessageAndDeliveryTime:
237 case LogType::kLogDeliveryTimeOnly:
238 message_header_builder.add_queue_index(context.queue_index);
239 message_header_builder.add_monotonic_sent_time(
240 context.monotonic_event_time.time_since_epoch().count());
241 message_header_builder.add_realtime_sent_time(
242 context.realtime_event_time.time_since_epoch().count());
243 break;
244 }
Austin Schuha36c8902019-12-30 18:07:15 -0800245
246 switch (log_type) {
247 case LogType::kLogMessage:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800248 case LogType::kLogRemoteMessage:
Austin Schuha36c8902019-12-30 18:07:15 -0800249 message_header_builder.add_data(data_offset);
250 break;
251
252 case LogType::kLogMessageAndDeliveryTime:
253 message_header_builder.add_data(data_offset);
254 [[fallthrough]];
255
256 case LogType::kLogDeliveryTimeOnly:
257 message_header_builder.add_monotonic_remote_time(
258 context.monotonic_remote_time.time_since_epoch().count());
259 message_header_builder.add_realtime_remote_time(
260 context.realtime_remote_time.time_since_epoch().count());
261 message_header_builder.add_remote_queue_index(context.remote_queue_index);
262 break;
263 }
264
265 return message_header_builder.Finish();
266}
267
Brian Silvermanf51499a2020-09-21 12:49:08 -0700268SpanReader::SpanReader(std::string_view filename) : filename_(filename) {
Brian Silvermanf59fe3f2020-09-22 21:04:09 -0700269 static const std::string_view kXz = ".xz";
270 if (filename.substr(filename.size() - kXz.size()) == kXz) {
271#if ENABLE_LZMA
272 decoder_ = std::make_unique<LzmaDecoder>(filename);
273#else
274 LOG(FATAL) << "Reading xz-compressed files not supported on this platform";
275#endif
276 } else {
277 decoder_ = std::make_unique<DummyDecoder>(filename);
278 }
Austin Schuh05b70472020-01-01 17:11:17 -0800279}
280
281absl::Span<const uint8_t> SpanReader::ReadMessage() {
282 // Make sure we have enough for the size.
283 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
284 if (!ReadBlock()) {
285 return absl::Span<const uint8_t>();
286 }
287 }
288
289 // Now make sure we have enough for the message.
290 const size_t data_size =
291 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
292 sizeof(flatbuffers::uoffset_t);
Austin Schuhe4fca832020-03-07 16:58:53 -0800293 if (data_size == sizeof(flatbuffers::uoffset_t)) {
294 LOG(ERROR) << "Size of data is zero. Log file end is corrupted, skipping.";
295 LOG(ERROR) << " Rest of log file is "
296 << absl::BytesToHexString(std::string_view(
297 reinterpret_cast<const char *>(data_.data() +
298 consumed_data_),
299 data_.size() - consumed_data_));
300 return absl::Span<const uint8_t>();
301 }
Austin Schuh05b70472020-01-01 17:11:17 -0800302 while (data_.size() < consumed_data_ + data_size) {
303 if (!ReadBlock()) {
304 return absl::Span<const uint8_t>();
305 }
306 }
307
308 // And return it, consuming the data.
309 const uint8_t *data_ptr = data_.data() + consumed_data_;
310
311 consumed_data_ += data_size;
312
313 return absl::Span<const uint8_t>(data_ptr, data_size);
314}
315
316bool SpanReader::MessageAvailable() {
317 // Are we big enough to read the size?
318 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
319 return false;
320 }
321
322 // Then, are we big enough to read the full message?
323 const size_t data_size =
324 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
325 sizeof(flatbuffers::uoffset_t);
326 if (data_.size() < consumed_data_ + data_size) {
327 return false;
328 }
329
330 return true;
331}
332
333bool SpanReader::ReadBlock() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700334 // This is the amount of data we grab at a time. Doing larger chunks minimizes
335 // syscalls and helps decompressors batch things more efficiently.
Austin Schuh05b70472020-01-01 17:11:17 -0800336 constexpr size_t kReadSize = 256 * 1024;
337
338 // Strip off any unused data at the front.
339 if (consumed_data_ != 0) {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700340 data_.erase_front(consumed_data_);
Austin Schuh05b70472020-01-01 17:11:17 -0800341 consumed_data_ = 0;
342 }
343
344 const size_t starting_size = data_.size();
345
346 // This should automatically grow the backing store. It won't shrink if we
347 // get a small chunk later. This reduces allocations when we want to append
348 // more data.
Brian Silvermanf51499a2020-09-21 12:49:08 -0700349 data_.resize(starting_size + kReadSize);
Austin Schuh05b70472020-01-01 17:11:17 -0800350
Brian Silvermanf51499a2020-09-21 12:49:08 -0700351 const size_t count =
352 decoder_->Read(data_.begin() + starting_size, data_.end());
353 data_.resize(starting_size + count);
Austin Schuh05b70472020-01-01 17:11:17 -0800354 if (count == 0) {
Austin Schuh05b70472020-01-01 17:11:17 -0800355 return false;
356 }
Austin Schuh05b70472020-01-01 17:11:17 -0800357
358 return true;
359}
360
Austin Schuh6f3babe2020-01-26 20:34:50 -0800361FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename) {
362 SpanReader span_reader(filename);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800363 absl::Span<const uint8_t> config_data = span_reader.ReadMessage();
364
365 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700366 CHECK(config_data != absl::Span<const uint8_t>())
367 << ": Failed to read header from: " << filename;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800368
Austin Schuh5212cad2020-09-09 23:12:09 -0700369 // And copy the config so we have it forever, removing the size prefix.
Brian Silverman354697a2020-09-22 21:06:32 -0700370 ResizeableBuffer data;
371 data.resize(config_data.size() - sizeof(flatbuffers::uoffset_t));
372 memcpy(data.data(), config_data.begin() + sizeof(flatbuffers::uoffset_t),
373 data.size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800374 return FlatbufferVector<LogFileHeader>(std::move(data));
375}
376
Austin Schuh5212cad2020-09-09 23:12:09 -0700377FlatbufferVector<MessageHeader> ReadNthMessage(std::string_view filename,
378 size_t n) {
379 SpanReader span_reader(filename);
380 absl::Span<const uint8_t> data_span = span_reader.ReadMessage();
381 for (size_t i = 0; i < n + 1; ++i) {
382 data_span = span_reader.ReadMessage();
383
384 // Make sure something was read.
385 CHECK(data_span != absl::Span<const uint8_t>())
386 << ": Failed to read data from: " << filename;
387 }
388
Brian Silverman354697a2020-09-22 21:06:32 -0700389 // And copy the config so we have it forever, removing the size prefix.
390 ResizeableBuffer data;
391 data.resize(data_span.size() - sizeof(flatbuffers::uoffset_t));
392 memcpy(data.data(), data_span.begin() + sizeof(flatbuffers::uoffset_t),
393 data.size());
Austin Schuh5212cad2020-09-09 23:12:09 -0700394 return FlatbufferVector<MessageHeader>(std::move(data));
395}
396
Austin Schuh05b70472020-01-01 17:11:17 -0800397MessageReader::MessageReader(std::string_view filename)
Austin Schuh97789fc2020-08-01 14:42:45 -0700398 : span_reader_(filename),
399 raw_log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuh05b70472020-01-01 17:11:17 -0800400 // Make sure we have enough to read the size.
Austin Schuh97789fc2020-08-01 14:42:45 -0700401 absl::Span<const uint8_t> header_data = span_reader_.ReadMessage();
Austin Schuh05b70472020-01-01 17:11:17 -0800402
403 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700404 CHECK(header_data != absl::Span<const uint8_t>())
405 << ": Failed to read header from: " << filename;
Austin Schuh05b70472020-01-01 17:11:17 -0800406
Austin Schuh97789fc2020-08-01 14:42:45 -0700407 // And copy the header data so we have it forever.
Brian Silverman354697a2020-09-22 21:06:32 -0700408 ResizeableBuffer header_data_copy;
409 header_data_copy.resize(header_data.size() - sizeof(flatbuffers::uoffset_t));
410 memcpy(header_data_copy.data(),
411 header_data.begin() + sizeof(flatbuffers::uoffset_t),
412 header_data_copy.size());
Austin Schuh97789fc2020-08-01 14:42:45 -0700413 raw_log_file_header_ =
414 FlatbufferVector<LogFileHeader>(std::move(header_data_copy));
Austin Schuh05b70472020-01-01 17:11:17 -0800415
Austin Schuhcde938c2020-02-02 17:30:07 -0800416 max_out_of_order_duration_ =
Austin Schuh2f8fd752020-09-01 22:38:28 -0700417 chrono::nanoseconds(log_file_header()->max_out_of_order_duration());
Austin Schuhcde938c2020-02-02 17:30:07 -0800418
419 VLOG(1) << "Opened " << filename << " as node "
420 << FlatbufferToJson(log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800421}
422
423std::optional<FlatbufferVector<MessageHeader>> MessageReader::ReadMessage() {
424 absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();
425 if (msg_data == absl::Span<const uint8_t>()) {
426 return std::nullopt;
427 }
428
Brian Silverman354697a2020-09-22 21:06:32 -0700429 ResizeableBuffer result_buffer;
430 result_buffer.resize(msg_data.size() - sizeof(flatbuffers::uoffset_t));
431 memcpy(result_buffer.data(),
432 msg_data.begin() + sizeof(flatbuffers::uoffset_t),
433 result_buffer.size());
434 FlatbufferVector<MessageHeader> result(std::move(result_buffer));
Austin Schuh05b70472020-01-01 17:11:17 -0800435
436 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
437 chrono::nanoseconds(result.message().monotonic_sent_time()));
438
439 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
Austin Schuh8bd96322020-02-13 21:18:22 -0800440 VLOG(2) << "Read from " << filename() << " data " << FlatbufferToJson(result);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800441 return std::move(result);
Austin Schuh05b70472020-01-01 17:11:17 -0800442}
443
Austin Schuh6f3babe2020-01-26 20:34:50 -0800444SplitMessageReader::SplitMessageReader(
Austin Schuhfa895892020-01-07 20:07:41 -0800445 const std::vector<std::string> &filenames)
446 : filenames_(filenames),
Austin Schuh97789fc2020-08-01 14:42:45 -0700447 log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuhfa895892020-01-07 20:07:41 -0800448 CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";
449
Austin Schuh6f3babe2020-01-26 20:34:50 -0800450 // Grab any log file header. They should all match (and we will check as we
451 // open more of them).
Austin Schuh97789fc2020-08-01 14:42:45 -0700452 log_file_header_ = message_reader_->raw_log_file_header();
Austin Schuhfa895892020-01-07 20:07:41 -0800453
Austin Schuh2f8fd752020-09-01 22:38:28 -0700454 for (size_t i = 1; i < filenames_.size(); ++i) {
455 MessageReader message_reader(filenames_[i]);
456
457 const monotonic_clock::time_point new_monotonic_start_time(
458 chrono::nanoseconds(
459 message_reader.log_file_header()->monotonic_start_time()));
460 const realtime_clock::time_point new_realtime_start_time(
461 chrono::nanoseconds(
462 message_reader.log_file_header()->realtime_start_time()));
463
464 // There are 2 types of part files. Part files from before time estimation
465 // has started, and part files after. We don't declare a log file "started"
466 // until time estimation is up. And once a log file starts, it should never
467 // stop again, and should remain constant.
468 // To compare both types of headers, we mutate our saved copy of the header
469 // to match the next chunk by updating time if we detect a stopped ->
470 // started transition.
471 if (monotonic_start_time() == monotonic_clock::min_time) {
472 CHECK_EQ(realtime_start_time(), realtime_clock::min_time);
473 // We should only be missing the monotonic start time when logging data
Brian Silverman87ac0402020-09-17 14:47:01 -0700474 // for remote nodes. We don't have a good way to determine the remote
Austin Schuh2f8fd752020-09-01 22:38:28 -0700475 // realtime offset, so it shouldn't be filled out.
476 // TODO(austin): If we have a good way, feel free to fill it out. It
477 // probably won't be better than we could do in post though with the same
478 // data.
479 CHECK(!log_file_header_.mutable_message()->has_realtime_start_time());
480 if (new_monotonic_start_time != monotonic_clock::min_time) {
481 // If we finally found our start time, update the header. Do this once
482 // because it should never change again.
483 log_file_header_.mutable_message()->mutate_monotonic_start_time(
484 new_monotonic_start_time.time_since_epoch().count());
485 log_file_header_.mutable_message()->mutate_realtime_start_time(
486 new_realtime_start_time.time_since_epoch().count());
487 }
488 }
489
Austin Schuh64fab802020-09-09 22:47:47 -0700490 // We don't have a good way to set the realtime start time on remote nodes.
491 // Confirm it remains consistent.
492 CHECK_EQ(log_file_header_.mutable_message()->has_realtime_start_time(),
493 message_reader.log_file_header()->has_realtime_start_time());
494
495 // Parts index will *not* match unless we set them to match. We only want
496 // to accept the start time and parts mismatching, so set them.
497 log_file_header_.mutable_message()->mutate_parts_index(
498 message_reader.log_file_header()->parts_index());
499
Austin Schuh2f8fd752020-09-01 22:38:28 -0700500 // Now compare that the headers match.
Austin Schuh64fab802020-09-09 22:47:47 -0700501 if (!CompareFlatBuffer(message_reader.raw_log_file_header(),
502 log_file_header_)) {
Brian Silvermanae7c0332020-09-30 16:58:23 -0700503 if (message_reader.log_file_header()->has_log_event_uuid() &&
504 log_file_header_.message().has_log_event_uuid() &&
505 message_reader.log_file_header()->log_event_uuid()->string_view() !=
506 log_file_header_.message().log_event_uuid()->string_view()) {
Austin Schuh64fab802020-09-09 22:47:47 -0700507 LOG(FATAL) << "Logger UUIDs don't match between log file chunks "
508 << filenames_[0] << " and " << filenames_[i]
509 << ", this is not supported.";
510 }
511 if (message_reader.log_file_header()->has_parts_uuid() &&
512 log_file_header_.message().has_parts_uuid() &&
513 message_reader.log_file_header()->parts_uuid()->string_view() !=
514 log_file_header_.message().parts_uuid()->string_view()) {
515 LOG(FATAL) << "Parts UUIDs don't match between log file chunks "
516 << filenames_[0] << " and " << filenames_[i]
517 << ", this is not supported.";
518 }
519
520 LOG(FATAL) << "Header is different between log file chunks "
521 << filenames_[0] << " and " << filenames_[i]
522 << ", this is not supported.";
523 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700524 }
Austin Schuh64fab802020-09-09 22:47:47 -0700525 // Put the parts index back to the first log file chunk.
526 log_file_header_.mutable_message()->mutate_parts_index(
527 message_reader_->log_file_header()->parts_index());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700528
Austin Schuh6f3babe2020-01-26 20:34:50 -0800529 // Setup per channel state.
Austin Schuh05b70472020-01-01 17:11:17 -0800530 channels_.resize(configuration()->channels()->size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800531 for (ChannelData &channel_data : channels_) {
532 channel_data.data.split_reader = this;
533 // Build up the timestamp list.
534 if (configuration::MultiNode(configuration())) {
535 channel_data.timestamps.resize(configuration()->nodes()->size());
536 for (MessageHeaderQueue &queue : channel_data.timestamps) {
537 queue.timestamps = true;
538 queue.split_reader = this;
539 }
540 }
541 }
Austin Schuh05b70472020-01-01 17:11:17 -0800542
Austin Schuh6f3babe2020-01-26 20:34:50 -0800543 // Build up channels_to_write_ as an optimization to make it fast to figure
544 // out which datastructure to place any new data from a channel on.
545 for (const Channel *channel : *configuration()->channels()) {
546 // This is the main case. We will only see data on this node.
547 if (configuration::ChannelIsSendableOnNode(channel, node())) {
548 channels_to_write_.emplace_back(
549 &channels_[channels_to_write_.size()].data);
550 } else
551 // If we can't send, but can receive, we should be able to see
552 // timestamps here.
553 if (configuration::ChannelIsReadableOnNode(channel, node())) {
554 channels_to_write_.emplace_back(
555 &(channels_[channels_to_write_.size()]
556 .timestamps[configuration::GetNodeIndex(configuration(),
557 node())]));
558 } else {
559 channels_to_write_.emplace_back(nullptr);
560 }
561 }
Austin Schuh05b70472020-01-01 17:11:17 -0800562}
563
Austin Schuh6f3babe2020-01-26 20:34:50 -0800564bool SplitMessageReader::NextLogFile() {
Austin Schuhfa895892020-01-07 20:07:41 -0800565 if (next_filename_index_ == filenames_.size()) {
566 return false;
567 }
568 message_reader_ =
569 std::make_unique<MessageReader>(filenames_[next_filename_index_]);
570
571 // We can't support the config diverging between two log file headers. See if
572 // they are the same.
573 if (next_filename_index_ != 0) {
Austin Schuh64fab802020-09-09 22:47:47 -0700574 // In order for the headers to identically compare, they need to have the
575 // same parts_index. Rewrite the saved header with the new parts_index,
576 // compare, and then restore.
577 const int32_t original_parts_index =
578 log_file_header_.message().parts_index();
579 log_file_header_.mutable_message()->mutate_parts_index(
580 message_reader_->log_file_header()->parts_index());
581
Austin Schuh97789fc2020-08-01 14:42:45 -0700582 CHECK(CompareFlatBuffer(message_reader_->raw_log_file_header(),
583 log_file_header_))
Austin Schuhfa895892020-01-07 20:07:41 -0800584 << ": Header is different between log file chunks "
585 << filenames_[next_filename_index_] << " and "
586 << filenames_[next_filename_index_ - 1] << ", this is not supported.";
Austin Schuh64fab802020-09-09 22:47:47 -0700587
588 log_file_header_.mutable_message()->mutate_parts_index(
589 original_parts_index);
Austin Schuhfa895892020-01-07 20:07:41 -0800590 }
591
592 ++next_filename_index_;
593 return true;
594}
595
Austin Schuh6f3babe2020-01-26 20:34:50 -0800596bool SplitMessageReader::QueueMessages(
Austin Schuhcde938c2020-02-02 17:30:07 -0800597 monotonic_clock::time_point last_dequeued_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800598 // TODO(austin): Once we are happy that everything works, read a 256kb chunk
599 // to reduce the need to re-heap down below.
Austin Schuhcde938c2020-02-02 17:30:07 -0800600
601 // Special case no more data. Otherwise we blow up on the CHECK statement
602 // confirming that we have enough data queued.
603 if (at_end_) {
604 return false;
605 }
606
607 // If this isn't the first time around, confirm that we had enough data queued
608 // to follow the contract.
609 if (time_to_queue_ != monotonic_clock::min_time) {
610 CHECK_LE(last_dequeued_time,
611 newest_timestamp() - max_out_of_order_duration())
612 << " node " << FlatbufferToJson(node()) << " on " << this;
613
614 // Bail if there is enough data already queued.
615 if (last_dequeued_time < time_to_queue_) {
Austin Schuhee711052020-08-24 16:06:09 -0700616 VLOG(1) << MaybeNodeName(target_node_) << "All up to date on " << this
617 << ", dequeued " << last_dequeued_time << " queue time "
618 << time_to_queue_;
Austin Schuhcde938c2020-02-02 17:30:07 -0800619 return true;
620 }
621 } else {
622 // Startup takes a special dance. We want to queue up until the start time,
623 // but we then want to find the next message to read. The conservative
624 // answer is to immediately trigger a second requeue to get things moving.
625 time_to_queue_ = monotonic_start_time();
626 QueueMessages(time_to_queue_);
627 }
628
629 // If we are asked to queue, queue for at least max_out_of_order_duration past
630 // the last known time in the log file (ie the newest timestep read). As long
631 // as we requeue exactly when time_to_queue_ is dequeued and go no further, we
632 // are safe. And since we pop in order, that works.
633 //
634 // Special case the start of the log file. There should be at most 1 message
635 // from each channel at the start of the log file. So always force the start
636 // of the log file to just be read.
637 time_to_queue_ = std::max(time_to_queue_, newest_timestamp());
Austin Schuhee711052020-08-24 16:06:09 -0700638 VLOG(1) << MaybeNodeName(target_node_) << "Queueing, going until "
639 << time_to_queue_ << " " << filename();
Austin Schuhcde938c2020-02-02 17:30:07 -0800640
641 bool was_emplaced = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800642 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800643 // Stop if we have enough.
Brian Silverman98360e22020-04-28 16:51:20 -0700644 if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&
Austin Schuhcde938c2020-02-02 17:30:07 -0800645 was_emplaced) {
Austin Schuhee711052020-08-24 16:06:09 -0700646 VLOG(1) << MaybeNodeName(target_node_) << "Done queueing on " << this
647 << ", queued to " << newest_timestamp() << " with requeue time "
648 << time_to_queue_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800649 return true;
650 }
Austin Schuh05b70472020-01-01 17:11:17 -0800651
Austin Schuh6f3babe2020-01-26 20:34:50 -0800652 if (std::optional<FlatbufferVector<MessageHeader>> msg =
653 message_reader_->ReadMessage()) {
654 const MessageHeader &header = msg.value().message();
655
Austin Schuhcde938c2020-02-02 17:30:07 -0800656 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
657 chrono::nanoseconds(header.monotonic_sent_time()));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800658
Austin Schuh0b5fd032020-03-28 17:36:49 -0700659 if (VLOG_IS_ON(2)) {
Brian Silvermand90905f2020-09-23 14:42:56 -0700660 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
661 << filename() << " ttq: " << time_to_queue_ << " now "
Austin Schuhee711052020-08-24 16:06:09 -0700662 << newest_timestamp() << " start time "
663 << monotonic_start_time() << " " << FlatbufferToJson(&header);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700664 } else if (VLOG_IS_ON(1)) {
665 FlatbufferVector<MessageHeader> copy = msg.value();
666 copy.mutable_message()->clear_data();
Austin Schuhee711052020-08-24 16:06:09 -0700667 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
668 << filename() << " ttq: " << time_to_queue_ << " now "
669 << newest_timestamp() << " start time "
670 << monotonic_start_time() << " " << FlatbufferToJson(copy);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700671 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800672
673 const int channel_index = header.channel_index();
674 was_emplaced = channels_to_write_[channel_index]->emplace_back(
675 std::move(msg.value()));
676 if (was_emplaced) {
677 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
678 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800679 } else {
680 if (!NextLogFile()) {
Austin Schuhee711052020-08-24 16:06:09 -0700681 VLOG(1) << MaybeNodeName(target_node_) << "No more files, last was "
682 << filenames_.back();
Austin Schuhcde938c2020-02-02 17:30:07 -0800683 at_end_ = true;
Austin Schuh8bd96322020-02-13 21:18:22 -0800684 for (MessageHeaderQueue *queue : channels_to_write_) {
685 if (queue == nullptr || queue->timestamp_merger == nullptr) {
686 continue;
687 }
688 queue->timestamp_merger->NoticeAtEnd();
689 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800690 return false;
691 }
692 }
Austin Schuh05b70472020-01-01 17:11:17 -0800693 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800694}
695
696void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,
697 int channel_index,
698 const Node *target_node) {
699 const Node *reinterpreted_target_node =
700 configuration::GetNodeOrDie(configuration(), target_node);
Austin Schuhee711052020-08-24 16:06:09 -0700701 target_node_ = reinterpreted_target_node;
702
Austin Schuh6f3babe2020-01-26 20:34:50 -0800703 const Channel *const channel =
704 configuration()->channels()->Get(channel_index);
705
Austin Schuhcde938c2020-02-02 17:30:07 -0800706 VLOG(1) << " Configuring merger " << this << " for channel " << channel_index
707 << " "
708 << configuration::CleanedChannelToString(
709 configuration()->channels()->Get(channel_index));
710
Austin Schuh6f3babe2020-01-26 20:34:50 -0800711 MessageHeaderQueue *message_header_queue = nullptr;
712
713 // Figure out if this log file is from our point of view, or the other node's
714 // point of view.
715 if (node() == reinterpreted_target_node) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800716 VLOG(1) << " Replaying as logged node " << filename();
717
718 if (configuration::ChannelIsSendableOnNode(channel, node())) {
719 VLOG(1) << " Data on node";
720 message_header_queue = &(channels_[channel_index].data);
721 } else if (configuration::ChannelIsReadableOnNode(channel, node())) {
722 VLOG(1) << " Timestamps on node";
723 message_header_queue =
724 &(channels_[channel_index].timestamps[configuration::GetNodeIndex(
725 configuration(), node())]);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800726 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800727 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800728 }
729 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800730 VLOG(1) << " Replaying as other node " << filename();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800731 // We are replaying from another node's point of view. The only interesting
Austin Schuhcde938c2020-02-02 17:30:07 -0800732 // data is data that is sent from our node and received on theirs.
733 if (configuration::ChannelIsReadableOnNode(channel,
734 reinterpreted_target_node) &&
735 configuration::ChannelIsSendableOnNode(channel, node())) {
736 VLOG(1) << " Readable on target node";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800737 // Data from another node.
738 message_header_queue = &(channels_[channel_index].data);
739 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800740 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800741 // This is either not sendable on the other node, or is a timestamp and
742 // therefore not interesting.
743 }
744 }
745
746 // If we found one, write it down. This will be nullptr when there is nothing
747 // relevant on this channel on this node for the target node. In that case,
748 // we want to drop the message instead of queueing it.
749 if (message_header_queue != nullptr) {
750 message_header_queue->timestamp_merger = timestamp_merger;
751 }
752}
753
754std::tuple<monotonic_clock::time_point, uint32_t,
755 FlatbufferVector<MessageHeader>>
756SplitMessageReader::PopOldest(int channel_index) {
757 CHECK_GT(channels_[channel_index].data.size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800758 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
759 timestamp = channels_[channel_index].data.front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800760 FlatbufferVector<MessageHeader> front =
761 std::move(channels_[channel_index].data.front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700762 channels_[channel_index].data.PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800763
Austin Schuh2f8fd752020-09-01 22:38:28 -0700764 VLOG(1) << MaybeNodeName(target_node_) << "Popped Data " << this << " "
765 << std::get<0>(timestamp) << " for "
766 << configuration::StrippedChannelToString(
767 configuration()->channels()->Get(channel_index))
768 << " (" << channel_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800769
770 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800771
772 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
773 std::move(front));
774}
775
776std::tuple<monotonic_clock::time_point, uint32_t,
777 FlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -0700778SplitMessageReader::PopOldestTimestamp(int channel, int node_index) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800779 CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800780 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
781 timestamp = channels_[channel].timestamps[node_index].front_timestamp();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800782 FlatbufferVector<MessageHeader> front =
783 std::move(channels_[channel].timestamps[node_index].front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700784 channels_[channel].timestamps[node_index].PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800785
Austin Schuh2f8fd752020-09-01 22:38:28 -0700786 VLOG(1) << MaybeNodeName(target_node_) << "Popped timestamp " << this << " "
Austin Schuhee711052020-08-24 16:06:09 -0700787 << std::get<0>(timestamp) << " for "
788 << configuration::StrippedChannelToString(
789 configuration()->channels()->Get(channel))
Austin Schuh2f8fd752020-09-01 22:38:28 -0700790 << " on "
791 << configuration()->nodes()->Get(node_index)->name()->string_view()
792 << " (" << node_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800793
794 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800795
796 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
797 std::move(front));
798}
799
Austin Schuhcde938c2020-02-02 17:30:07 -0800800bool SplitMessageReader::MessageHeaderQueue::emplace_back(
Austin Schuh6f3babe2020-01-26 20:34:50 -0800801 FlatbufferVector<MessageHeader> &&msg) {
802 CHECK(split_reader != nullptr);
803
804 // If there is no timestamp merger for this queue, nobody is listening. Drop
805 // the message. This happens when a log file from another node is replayed,
806 // and the timestamp mergers down stream just don't care.
807 if (timestamp_merger == nullptr) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800808 return false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800809 }
810
811 CHECK(timestamps != msg.message().has_data())
812 << ": Got timestamps and data mixed up on a node. "
813 << FlatbufferToJson(msg);
814
815 data_.emplace_back(std::move(msg));
816
817 if (data_.size() == 1u) {
818 // Yup, new data. Notify.
819 if (timestamps) {
820 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
821 } else {
822 timestamp_merger->Update(split_reader, front_timestamp());
823 }
824 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800825
826 return true;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800827}
828
Austin Schuh2f8fd752020-09-01 22:38:28 -0700829void SplitMessageReader::MessageHeaderQueue::PopFront() {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800830 data_.pop_front();
831 if (data_.size() != 0u) {
832 // Yup, new data.
833 if (timestamps) {
834 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
835 } else {
836 timestamp_merger->Update(split_reader, front_timestamp());
837 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700838 } else {
839 // Poke anyways to update the heap.
840 if (timestamps) {
841 timestamp_merger->UpdateTimestamp(
842 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
843 } else {
844 timestamp_merger->Update(
845 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
846 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800847 }
Austin Schuh05b70472020-01-01 17:11:17 -0800848}
849
850namespace {
851
Austin Schuh6f3babe2020-01-26 20:34:50 -0800852bool SplitMessageReaderHeapCompare(
853 const std::tuple<monotonic_clock::time_point, uint32_t,
854 SplitMessageReader *>
855 first,
856 const std::tuple<monotonic_clock::time_point, uint32_t,
857 SplitMessageReader *>
858 second) {
859 if (std::get<0>(first) > std::get<0>(second)) {
860 return true;
861 } else if (std::get<0>(first) == std::get<0>(second)) {
862 if (std::get<1>(first) > std::get<1>(second)) {
863 return true;
864 } else if (std::get<1>(first) == std::get<1>(second)) {
865 return std::get<2>(first) > std::get<2>(second);
866 } else {
867 return false;
868 }
869 } else {
870 return false;
871 }
872}
873
Austin Schuh05b70472020-01-01 17:11:17 -0800874bool ChannelHeapCompare(
875 const std::pair<monotonic_clock::time_point, int> first,
876 const std::pair<monotonic_clock::time_point, int> second) {
877 if (first.first > second.first) {
878 return true;
879 } else if (first.first == second.first) {
880 return first.second > second.second;
881 } else {
882 return false;
883 }
884}
885
886} // namespace
887
Austin Schuh6f3babe2020-01-26 20:34:50 -0800888TimestampMerger::TimestampMerger(
889 const Configuration *configuration,
890 std::vector<SplitMessageReader *> split_message_readers, int channel_index,
891 const Node *target_node, ChannelMerger *channel_merger)
892 : configuration_(configuration),
893 split_message_readers_(std::move(split_message_readers)),
894 channel_index_(channel_index),
895 node_index_(configuration::MultiNode(configuration)
896 ? configuration::GetNodeIndex(configuration, target_node)
897 : -1),
898 channel_merger_(channel_merger) {
899 // Tell the readers we care so they know who to notify.
Austin Schuhcde938c2020-02-02 17:30:07 -0800900 VLOG(1) << "Configuring channel " << channel_index << " target node "
901 << FlatbufferToJson(target_node);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800902 for (SplitMessageReader *reader : split_message_readers_) {
903 reader->SetTimestampMerger(this, channel_index, target_node);
904 }
905
906 // And then determine if we need to track timestamps.
907 const Channel *channel = configuration->channels()->Get(channel_index);
908 if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&
909 configuration::ChannelIsReadableOnNode(channel, target_node)) {
910 has_timestamps_ = true;
911 }
912}
913
914void TimestampMerger::PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800915 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
916 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800917 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700918 if (split_message_reader != nullptr) {
919 DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),
920 [split_message_reader](
921 const std::tuple<monotonic_clock::time_point,
922 uint32_t, SplitMessageReader *>
923 x) {
924 return std::get<2>(x) == split_message_reader;
925 }) == message_heap_.end())
926 << ": Pushing message when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800927
Austin Schuh2f8fd752020-09-01 22:38:28 -0700928 message_heap_.push_back(std::make_tuple(
929 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800930
Austin Schuh2f8fd752020-09-01 22:38:28 -0700931 std::push_heap(message_heap_.begin(), message_heap_.end(),
932 &SplitMessageReaderHeapCompare);
933 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800934
935 // If we are just a data merger, don't wait for timestamps.
936 if (!has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700937 if (!message_heap_.empty()) {
938 channel_merger_->Update(std::get<0>(message_heap_[0]), channel_index_);
939 pushed_ = true;
940 } else {
941 // Remove ourselves if we are empty.
942 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
943 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800944 }
945}
946
Austin Schuhcde938c2020-02-02 17:30:07 -0800947std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
948TimestampMerger::oldest_message() const {
949 CHECK_GT(message_heap_.size(), 0u);
950 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
951 oldest_message_reader = message_heap_.front();
952 return std::get<2>(oldest_message_reader)->oldest_message(channel_index_);
953}
954
955std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
956TimestampMerger::oldest_timestamp() const {
957 CHECK_GT(timestamp_heap_.size(), 0u);
958 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
959 oldest_message_reader = timestamp_heap_.front();
960 return std::get<2>(oldest_message_reader)
961 ->oldest_message(channel_index_, node_index_);
962}
963
Austin Schuh6f3babe2020-01-26 20:34:50 -0800964void TimestampMerger::PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800965 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
966 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800967 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700968 if (split_message_reader != nullptr) {
969 DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),
970 [split_message_reader](
971 const std::tuple<monotonic_clock::time_point,
972 uint32_t, SplitMessageReader *>
973 x) {
974 return std::get<2>(x) == split_message_reader;
975 }) == timestamp_heap_.end())
976 << ": Pushing timestamp when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800977
Austin Schuh2f8fd752020-09-01 22:38:28 -0700978 timestamp_heap_.push_back(std::make_tuple(
979 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800980
Austin Schuh2f8fd752020-09-01 22:38:28 -0700981 std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
982 SplitMessageReaderHeapCompare);
983 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800984
985 // If we are a timestamp merger, don't wait for data. Missing data will be
986 // caught at read time.
987 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700988 if (!timestamp_heap_.empty()) {
989 channel_merger_->Update(std::get<0>(timestamp_heap_[0]), channel_index_);
990 pushed_ = true;
991 } else {
992 // Remove ourselves if we are empty.
993 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
994 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800995 }
996}
997
998std::tuple<monotonic_clock::time_point, uint32_t,
999 FlatbufferVector<MessageHeader>>
1000TimestampMerger::PopMessageHeap() {
1001 // Pop the oldest message reader pointer off the heap.
1002 CHECK_GT(message_heap_.size(), 0u);
1003 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1004 oldest_message_reader = message_heap_.front();
1005
1006 std::pop_heap(message_heap_.begin(), message_heap_.end(),
1007 &SplitMessageReaderHeapCompare);
1008 message_heap_.pop_back();
1009
1010 // Pop the oldest message. This re-pushes any messages from the reader to the
1011 // message heap.
1012 std::tuple<monotonic_clock::time_point, uint32_t,
1013 FlatbufferVector<MessageHeader>>
1014 oldest_message =
1015 std::get<2>(oldest_message_reader)->PopOldest(channel_index_);
1016
1017 // Confirm that the time and queue_index we have recorded matches.
1018 CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));
1019 CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));
1020
1021 // Now, keep reading until we have found all duplicates.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001022 while (!message_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001023 // See if it is a duplicate.
1024 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1025 next_oldest_message_reader = message_heap_.front();
1026
Austin Schuhcde938c2020-02-02 17:30:07 -08001027 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1028 next_oldest_message_time = std::get<2>(next_oldest_message_reader)
1029 ->oldest_message(channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001030
1031 if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&
1032 std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {
1033 // Pop the message reader pointer.
1034 std::pop_heap(message_heap_.begin(), message_heap_.end(),
1035 &SplitMessageReaderHeapCompare);
1036 message_heap_.pop_back();
1037
1038 // Pop the next oldest message. This re-pushes any messages from the
1039 // reader.
1040 std::tuple<monotonic_clock::time_point, uint32_t,
1041 FlatbufferVector<MessageHeader>>
1042 next_oldest_message = std::get<2>(next_oldest_message_reader)
1043 ->PopOldest(channel_index_);
1044
1045 // And make sure the message matches in it's entirety.
1046 CHECK(std::get<2>(oldest_message).span() ==
1047 std::get<2>(next_oldest_message).span())
1048 << ": Data at the same timestamp doesn't match.";
1049 } else {
1050 break;
1051 }
1052 }
1053
1054 return oldest_message;
1055}
1056
1057std::tuple<monotonic_clock::time_point, uint32_t,
1058 FlatbufferVector<MessageHeader>>
1059TimestampMerger::PopTimestampHeap() {
1060 // Pop the oldest message reader pointer off the heap.
1061 CHECK_GT(timestamp_heap_.size(), 0u);
1062
1063 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1064 oldest_timestamp_reader = timestamp_heap_.front();
1065
1066 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1067 &SplitMessageReaderHeapCompare);
1068 timestamp_heap_.pop_back();
1069
1070 CHECK(node_index_ != -1) << ": Timestamps in a single node environment";
1071
1072 // Pop the oldest message. This re-pushes any timestamps from the reader to
1073 // the timestamp heap.
1074 std::tuple<monotonic_clock::time_point, uint32_t,
1075 FlatbufferVector<MessageHeader>>
1076 oldest_timestamp = std::get<2>(oldest_timestamp_reader)
Austin Schuh2f8fd752020-09-01 22:38:28 -07001077 ->PopOldestTimestamp(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001078
1079 // Confirm that the time we have recorded matches.
1080 CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));
1081 CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));
1082
Austin Schuh2f8fd752020-09-01 22:38:28 -07001083 // Now, keep reading until we have found all duplicates.
1084 while (!timestamp_heap_.empty()) {
1085 // See if it is a duplicate.
1086 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1087 next_oldest_timestamp_reader = timestamp_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001088
Austin Schuh2f8fd752020-09-01 22:38:28 -07001089 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1090 next_oldest_timestamp_time =
1091 std::get<2>(next_oldest_timestamp_reader)
1092 ->oldest_message(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001093
Austin Schuh2f8fd752020-09-01 22:38:28 -07001094 if (std::get<0>(next_oldest_timestamp_time) ==
1095 std::get<0>(oldest_timestamp) &&
1096 std::get<1>(next_oldest_timestamp_time) ==
1097 std::get<1>(oldest_timestamp)) {
1098 // Pop the timestamp reader pointer.
1099 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1100 &SplitMessageReaderHeapCompare);
1101 timestamp_heap_.pop_back();
1102
1103 // Pop the next oldest timestamp. This re-pushes any messages from the
1104 // reader.
1105 std::tuple<monotonic_clock::time_point, uint32_t,
1106 FlatbufferVector<MessageHeader>>
1107 next_oldest_timestamp =
1108 std::get<2>(next_oldest_timestamp_reader)
1109 ->PopOldestTimestamp(channel_index_, node_index_);
1110
1111 // And make sure the contents matches in it's entirety.
1112 CHECK(std::get<2>(oldest_timestamp).span() ==
1113 std::get<2>(next_oldest_timestamp).span())
1114 << ": Data at the same timestamp doesn't match, "
1115 << aos::FlatbufferToJson(std::get<2>(oldest_timestamp)) << " vs "
1116 << aos::FlatbufferToJson(std::get<2>(next_oldest_timestamp)) << " "
1117 << absl::BytesToHexString(std::string_view(
1118 reinterpret_cast<const char *>(
1119 std::get<2>(oldest_timestamp).span().data()),
1120 std::get<2>(oldest_timestamp).span().size()))
1121 << " vs "
1122 << absl::BytesToHexString(std::string_view(
1123 reinterpret_cast<const char *>(
1124 std::get<2>(next_oldest_timestamp).span().data()),
1125 std::get<2>(next_oldest_timestamp).span().size()));
1126
1127 } else {
1128 break;
1129 }
Austin Schuh8bd96322020-02-13 21:18:22 -08001130 }
1131
Austin Schuh2f8fd752020-09-01 22:38:28 -07001132 return oldest_timestamp;
Austin Schuh8bd96322020-02-13 21:18:22 -08001133}
1134
Austin Schuh6f3babe2020-01-26 20:34:50 -08001135std::tuple<TimestampMerger::DeliveryTimestamp, FlatbufferVector<MessageHeader>>
1136TimestampMerger::PopOldest() {
1137 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001138 VLOG(1) << "Looking for matching timestamp for "
1139 << configuration::StrippedChannelToString(
1140 configuration_->channels()->Get(channel_index_))
1141 << " (" << channel_index_ << ") "
1142 << " at " << std::get<0>(oldest_timestamp());
1143
Austin Schuh8bd96322020-02-13 21:18:22 -08001144 // Read the timestamps.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001145 std::tuple<monotonic_clock::time_point, uint32_t,
1146 FlatbufferVector<MessageHeader>>
1147 oldest_timestamp = PopTimestampHeap();
1148
1149 TimestampMerger::DeliveryTimestamp timestamp;
1150 timestamp.monotonic_event_time =
1151 monotonic_clock::time_point(chrono::nanoseconds(
1152 std::get<2>(oldest_timestamp).message().monotonic_sent_time()));
1153 timestamp.realtime_event_time =
1154 realtime_clock::time_point(chrono::nanoseconds(
1155 std::get<2>(oldest_timestamp).message().realtime_sent_time()));
Austin Schuh8d7e0bb2020-10-02 17:57:00 -07001156 timestamp.queue_index =
1157 std::get<2>(oldest_timestamp).message().queue_index();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001158
1159 // Consistency check.
1160 CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));
1161 CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),
1162 std::get<1>(oldest_timestamp));
1163
1164 monotonic_clock::time_point remote_timestamp_monotonic_time(
1165 chrono::nanoseconds(
1166 std::get<2>(oldest_timestamp).message().monotonic_remote_time()));
1167
Austin Schuh8bd96322020-02-13 21:18:22 -08001168 // See if we have any data. If not, pass the problem up the chain.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001169 if (message_heap_.empty()) {
Austin Schuhee711052020-08-24 16:06:09 -07001170 LOG(WARNING) << MaybeNodeName(configuration_->nodes()->Get(node_index_))
1171 << "No data to match timestamp on "
1172 << configuration::CleanedChannelToString(
1173 configuration_->channels()->Get(channel_index_))
1174 << " (" << channel_index_ << ")";
Austin Schuh8bd96322020-02-13 21:18:22 -08001175 return std::make_tuple(timestamp,
1176 std::move(std::get<2>(oldest_timestamp)));
1177 }
1178
Austin Schuh6f3babe2020-01-26 20:34:50 -08001179 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001180 {
1181 // Ok, now try grabbing data until we find one which matches.
1182 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1183 oldest_message_ref = oldest_message();
1184
1185 // Time at which the message was sent (this message is written from the
1186 // sending node's perspective.
1187 monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(
1188 std::get<2>(oldest_message_ref)->monotonic_sent_time()));
1189
1190 if (remote_monotonic_time < remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001191 LOG(WARNING) << configuration_->nodes()
1192 ->Get(node_index_)
1193 ->name()
1194 ->string_view()
1195 << " Undelivered message, skipping. Remote time is "
1196 << remote_monotonic_time << " timestamp is "
1197 << remote_timestamp_monotonic_time << " on channel "
1198 << configuration::StrippedChannelToString(
1199 configuration_->channels()->Get(channel_index_))
1200 << " (" << channel_index_ << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -08001201 PopMessageHeap();
1202 continue;
1203 } else if (remote_monotonic_time > remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001204 LOG(WARNING) << configuration_->nodes()
1205 ->Get(node_index_)
1206 ->name()
1207 ->string_view()
1208 << " Data not found. Remote time should be "
1209 << remote_timestamp_monotonic_time
1210 << ", message time is " << remote_monotonic_time
1211 << " on channel "
1212 << configuration::StrippedChannelToString(
1213 configuration_->channels()->Get(channel_index_))
Austin Schuh2f8fd752020-09-01 22:38:28 -07001214 << " (" << channel_index_ << ")"
1215 << (VLOG_IS_ON(1) ? DebugString() : "");
Austin Schuhcde938c2020-02-02 17:30:07 -08001216 return std::make_tuple(timestamp,
1217 std::move(std::get<2>(oldest_timestamp)));
1218 }
1219
1220 timestamp.monotonic_remote_time = remote_monotonic_time;
1221 }
1222
Austin Schuh2f8fd752020-09-01 22:38:28 -07001223 VLOG(1) << "Found matching data "
1224 << configuration::StrippedChannelToString(
1225 configuration_->channels()->Get(channel_index_))
1226 << " (" << channel_index_ << ")";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001227 std::tuple<monotonic_clock::time_point, uint32_t,
1228 FlatbufferVector<MessageHeader>>
1229 oldest_message = PopMessageHeap();
1230
Austin Schuh6f3babe2020-01-26 20:34:50 -08001231 timestamp.realtime_remote_time =
1232 realtime_clock::time_point(chrono::nanoseconds(
1233 std::get<2>(oldest_message).message().realtime_sent_time()));
1234 timestamp.remote_queue_index =
1235 std::get<2>(oldest_message).message().queue_index();
1236
Austin Schuhcde938c2020-02-02 17:30:07 -08001237 CHECK_EQ(timestamp.monotonic_remote_time,
1238 remote_timestamp_monotonic_time);
1239
1240 CHECK_EQ(timestamp.remote_queue_index,
1241 std::get<2>(oldest_timestamp).message().remote_queue_index())
1242 << ": " << FlatbufferToJson(&std::get<2>(oldest_timestamp).message())
1243 << " data "
1244 << FlatbufferToJson(&std::get<2>(oldest_message).message());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001245
Austin Schuh30dd5c52020-08-01 14:43:44 -07001246 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001247 }
1248 } else {
1249 std::tuple<monotonic_clock::time_point, uint32_t,
1250 FlatbufferVector<MessageHeader>>
1251 oldest_message = PopMessageHeap();
1252
1253 TimestampMerger::DeliveryTimestamp timestamp;
1254 timestamp.monotonic_event_time =
1255 monotonic_clock::time_point(chrono::nanoseconds(
1256 std::get<2>(oldest_message).message().monotonic_sent_time()));
1257 timestamp.realtime_event_time =
1258 realtime_clock::time_point(chrono::nanoseconds(
1259 std::get<2>(oldest_message).message().realtime_sent_time()));
Austin Schuh8d7e0bb2020-10-02 17:57:00 -07001260 timestamp.queue_index = std::get<2>(oldest_message).message().queue_index();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001261 timestamp.remote_queue_index = 0xffffffff;
1262
1263 CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);
1264 CHECK_EQ(std::get<1>(oldest_message),
1265 std::get<2>(oldest_message).message().queue_index());
1266
Austin Schuh30dd5c52020-08-01 14:43:44 -07001267 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001268 }
1269}
1270
Austin Schuh8bd96322020-02-13 21:18:22 -08001271void TimestampMerger::NoticeAtEnd() { channel_merger_->NoticeAtEnd(); }
1272
Austin Schuh6f3babe2020-01-26 20:34:50 -08001273namespace {
1274std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(
1275 const std::vector<std::vector<std::string>> &filenames) {
1276 CHECK_GT(filenames.size(), 0u);
1277 // Build up all the SplitMessageReaders.
1278 std::vector<std::unique_ptr<SplitMessageReader>> result;
1279 for (const std::vector<std::string> &filenames : filenames) {
1280 result.emplace_back(std::make_unique<SplitMessageReader>(filenames));
1281 }
1282 return result;
1283}
1284} // namespace
1285
1286ChannelMerger::ChannelMerger(
1287 const std::vector<std::vector<std::string>> &filenames)
1288 : split_message_readers_(MakeSplitMessageReaders(filenames)),
Austin Schuh97789fc2020-08-01 14:42:45 -07001289 log_file_header_(split_message_readers_[0]->raw_log_file_header()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001290 // Now, confirm that the configuration matches for each and pick a start time.
1291 // Also return the list of possible nodes.
1292 for (const std::unique_ptr<SplitMessageReader> &reader :
1293 split_message_readers_) {
1294 CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),
1295 reader->log_file_header()->configuration()))
1296 << ": Replaying log files with different configurations isn't "
1297 "supported";
1298 }
1299
1300 nodes_ = configuration::GetNodes(configuration());
1301}
1302
1303bool ChannelMerger::SetNode(const Node *target_node) {
1304 std::vector<SplitMessageReader *> split_message_readers;
1305 for (const std::unique_ptr<SplitMessageReader> &reader :
1306 split_message_readers_) {
1307 split_message_readers.emplace_back(reader.get());
1308 }
1309
1310 // Go find a log_file_header for this node.
1311 {
1312 bool found_node = false;
1313
1314 for (const std::unique_ptr<SplitMessageReader> &reader :
1315 split_message_readers_) {
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001316 // In order to identify which logfile(s) map to the target node, do a
1317 // logical comparison of the nodes, by confirming that we are either in a
1318 // single-node setup (where the nodes will both be nullptr) or that the
1319 // node names match (but the other node fields--e.g., hostname lists--may
1320 // not).
1321 const bool both_null =
1322 reader->node() == nullptr && target_node == nullptr;
1323 const bool both_have_name =
1324 (reader->node() != nullptr) && (target_node != nullptr) &&
1325 (reader->node()->has_name() && target_node->has_name());
1326 const bool node_names_identical =
Brian Silvermand90905f2020-09-23 14:42:56 -07001327 both_have_name && (reader->node()->name()->string_view() ==
1328 target_node->name()->string_view());
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001329 if (both_null || node_names_identical) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001330 if (!found_node) {
1331 found_node = true;
1332 log_file_header_ = CopyFlatBuffer(reader->log_file_header());
Austin Schuhcde938c2020-02-02 17:30:07 -08001333 VLOG(1) << "Found log file " << reader->filename() << " with node "
1334 << FlatbufferToJson(reader->node()) << " start_time "
1335 << monotonic_start_time();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001336 } else {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001337 // Find the earliest start time. That way, if we get a full log file
1338 // directly from the node, and a partial later, we start with the
1339 // full. Update our header to match that.
1340 const monotonic_clock::time_point new_monotonic_start_time(
1341 chrono::nanoseconds(
1342 reader->log_file_header()->monotonic_start_time()));
1343 const realtime_clock::time_point new_realtime_start_time(
1344 chrono::nanoseconds(
1345 reader->log_file_header()->realtime_start_time()));
1346
1347 if (monotonic_start_time() == monotonic_clock::min_time ||
1348 (new_monotonic_start_time != monotonic_clock::min_time &&
1349 new_monotonic_start_time < monotonic_start_time())) {
1350 log_file_header_.mutable_message()->mutate_monotonic_start_time(
1351 new_monotonic_start_time.time_since_epoch().count());
1352 log_file_header_.mutable_message()->mutate_realtime_start_time(
1353 new_realtime_start_time.time_since_epoch().count());
1354 VLOG(1) << "Updated log file " << reader->filename()
1355 << " with node " << FlatbufferToJson(reader->node())
1356 << " start_time " << new_monotonic_start_time;
1357 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001358 }
1359 }
1360 }
1361
1362 if (!found_node) {
1363 LOG(WARNING) << "Failed to find log file for node "
1364 << FlatbufferToJson(target_node);
1365 return false;
1366 }
1367 }
1368
1369 // Build up all the timestamp mergers. This connects up all the
1370 // SplitMessageReaders.
1371 timestamp_mergers_.reserve(configuration()->channels()->size());
1372 for (size_t channel_index = 0;
1373 channel_index < configuration()->channels()->size(); ++channel_index) {
1374 timestamp_mergers_.emplace_back(
1375 configuration(), split_message_readers, channel_index,
1376 configuration::GetNode(configuration(), target_node), this);
1377 }
1378
1379 // And prime everything.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001380 for (std::unique_ptr<SplitMessageReader> &split_message_reader :
1381 split_message_readers_) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001382 split_message_reader->QueueMessages(
1383 split_message_reader->monotonic_start_time());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001384 }
1385
1386 node_ = configuration::GetNodeOrDie(configuration(), target_node);
1387 return true;
1388}
1389
Austin Schuh858c9f32020-08-31 16:56:12 -07001390monotonic_clock::time_point ChannelMerger::OldestMessageTime() const {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001391 if (channel_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001392 return monotonic_clock::max_time;
1393 }
1394 return channel_heap_.front().first;
1395}
1396
1397void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,
1398 int channel_index) {
1399 // Pop and recreate the heap if it has already been pushed. And since we are
1400 // pushing again, we don't need to clear pushed.
1401 if (timestamp_mergers_[channel_index].pushed()) {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001402 const auto channel_iterator = std::find_if(
Austin Schuh6f3babe2020-01-26 20:34:50 -08001403 channel_heap_.begin(), channel_heap_.end(),
1404 [channel_index](const std::pair<monotonic_clock::time_point, int> x) {
1405 return x.second == channel_index;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001406 });
1407 DCHECK(channel_iterator != channel_heap_.end());
1408 if (std::get<0>(*channel_iterator) == timestamp) {
1409 // It's already in the heap, in the correct spot, so nothing
1410 // more for us to do here.
1411 return;
1412 }
1413 channel_heap_.erase(channel_iterator);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001414 std::make_heap(channel_heap_.begin(), channel_heap_.end(),
1415 ChannelHeapCompare);
1416 }
1417
Austin Schuh2f8fd752020-09-01 22:38:28 -07001418 if (timestamp == monotonic_clock::min_time) {
1419 timestamp_mergers_[channel_index].set_pushed(false);
1420 return;
1421 }
1422
Austin Schuh05b70472020-01-01 17:11:17 -08001423 channel_heap_.push_back(std::make_pair(timestamp, channel_index));
1424
1425 // The default sort puts the newest message first. Use a custom comparator to
1426 // put the oldest message first.
1427 std::push_heap(channel_heap_.begin(), channel_heap_.end(),
1428 ChannelHeapCompare);
1429}
1430
Austin Schuh2f8fd752020-09-01 22:38:28 -07001431void ChannelMerger::VerifyHeaps() {
Austin Schuh661a8d82020-09-13 17:25:56 -07001432 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1433 channel_heap_;
1434 std::make_heap(channel_heap.begin(), channel_heap.end(), &ChannelHeapCompare);
Austin Schuh2f8fd752020-09-01 22:38:28 -07001435
Austin Schuh661a8d82020-09-13 17:25:56 -07001436 for (size_t i = 0; i < channel_heap_.size(); ++i) {
1437 CHECK(channel_heap_[i] == channel_heap[i]) << ": Heaps diverged...";
1438 CHECK_EQ(
1439 std::get<0>(channel_heap[i]),
1440 timestamp_mergers_[std::get<1>(channel_heap[i])].channel_merger_time());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001441 }
1442}
1443
Austin Schuh6f3babe2020-01-26 20:34:50 -08001444std::tuple<TimestampMerger::DeliveryTimestamp, int,
1445 FlatbufferVector<MessageHeader>>
1446ChannelMerger::PopOldest() {
Austin Schuh8bd96322020-02-13 21:18:22 -08001447 CHECK_GT(channel_heap_.size(), 0u);
Austin Schuh05b70472020-01-01 17:11:17 -08001448 std::pair<monotonic_clock::time_point, int> oldest_channel_data =
1449 channel_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001450 int channel_index = oldest_channel_data.second;
Austin Schuh05b70472020-01-01 17:11:17 -08001451 std::pop_heap(channel_heap_.begin(), channel_heap_.end(),
1452 &ChannelHeapCompare);
1453 channel_heap_.pop_back();
Austin Schuh8bd96322020-02-13 21:18:22 -08001454
Austin Schuh6f3babe2020-01-26 20:34:50 -08001455 timestamp_mergers_[channel_index].set_pushed(false);
Austin Schuh05b70472020-01-01 17:11:17 -08001456
Austin Schuh6f3babe2020-01-26 20:34:50 -08001457 TimestampMerger *merger = &timestamp_mergers_[channel_index];
Austin Schuh05b70472020-01-01 17:11:17 -08001458
Austin Schuhcde938c2020-02-02 17:30:07 -08001459 // Merger handles any queueing needed from here.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001460 std::tuple<TimestampMerger::DeliveryTimestamp,
1461 FlatbufferVector<MessageHeader>>
1462 message = merger->PopOldest();
Brian Silverman8a32ce62020-08-12 12:02:38 -07001463 DCHECK_EQ(std::get<0>(message).monotonic_event_time,
1464 oldest_channel_data.first)
1465 << ": channel_heap_ was corrupted for " << channel_index << ": "
1466 << DebugString();
Austin Schuh05b70472020-01-01 17:11:17 -08001467
Austin Schuh2f8fd752020-09-01 22:38:28 -07001468 CHECK_GE(std::get<0>(message).monotonic_event_time, last_popped_time_)
1469 << ": " << MaybeNodeName(log_file_header()->node())
1470 << "Messages came off the queue out of order. " << DebugString();
1471 last_popped_time_ = std::get<0>(message).monotonic_event_time;
1472
1473 VLOG(1) << "Popped " << last_popped_time_ << " "
1474 << configuration::StrippedChannelToString(
1475 configuration()->channels()->Get(channel_index))
1476 << " (" << channel_index << ")";
1477
Austin Schuh6f3babe2020-01-26 20:34:50 -08001478 return std::make_tuple(std::get<0>(message), channel_index,
1479 std::move(std::get<1>(message)));
1480}
1481
Austin Schuhcde938c2020-02-02 17:30:07 -08001482std::string SplitMessageReader::MessageHeaderQueue::DebugString() const {
1483 std::stringstream ss;
1484 for (size_t i = 0; i < data_.size(); ++i) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001485 if (i < 5 || i + 5 > data_.size()) {
1486 if (timestamps) {
1487 ss << " msg: ";
1488 } else {
1489 ss << " timestamp: ";
1490 }
1491 ss << monotonic_clock::time_point(
1492 chrono::nanoseconds(data_[i].message().monotonic_sent_time()))
Austin Schuhcde938c2020-02-02 17:30:07 -08001493 << " ("
Austin Schuh2f8fd752020-09-01 22:38:28 -07001494 << realtime_clock::time_point(
1495 chrono::nanoseconds(data_[i].message().realtime_sent_time()))
1496 << ") " << data_[i].message().queue_index();
1497 if (timestamps) {
1498 ss << " <- remote "
1499 << monotonic_clock::time_point(chrono::nanoseconds(
1500 data_[i].message().monotonic_remote_time()))
1501 << " ("
1502 << realtime_clock::time_point(chrono::nanoseconds(
1503 data_[i].message().realtime_remote_time()))
1504 << ")";
1505 }
1506 ss << "\n";
1507 } else if (i == 5) {
1508 ss << " ...\n";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001509 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001510 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001511
Austin Schuhcde938c2020-02-02 17:30:07 -08001512 return ss.str();
1513}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001514
Austin Schuhcde938c2020-02-02 17:30:07 -08001515std::string SplitMessageReader::DebugString(int channel) const {
1516 std::stringstream ss;
1517 ss << "[\n";
1518 ss << channels_[channel].data.DebugString();
1519 ss << " ]";
1520 return ss.str();
1521}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001522
Austin Schuhcde938c2020-02-02 17:30:07 -08001523std::string SplitMessageReader::DebugString(int channel, int node_index) const {
1524 std::stringstream ss;
1525 ss << "[\n";
1526 ss << channels_[channel].timestamps[node_index].DebugString();
1527 ss << " ]";
1528 return ss.str();
1529}
1530
1531std::string TimestampMerger::DebugString() const {
1532 std::stringstream ss;
1533
1534 if (timestamp_heap_.size() > 0) {
1535 ss << " timestamp_heap {\n";
1536 std::vector<
1537 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1538 timestamp_heap = timestamp_heap_;
1539 while (timestamp_heap.size() > 0u) {
1540 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1541 oldest_timestamp_reader = timestamp_heap.front();
1542
1543 ss << " " << std::get<2>(oldest_timestamp_reader) << " "
1544 << std::get<0>(oldest_timestamp_reader) << " queue_index ("
1545 << std::get<1>(oldest_timestamp_reader) << ") ttq "
1546 << std::get<2>(oldest_timestamp_reader)->time_to_queue() << " "
1547 << std::get<2>(oldest_timestamp_reader)->filename() << " -> "
1548 << std::get<2>(oldest_timestamp_reader)
1549 ->DebugString(channel_index_, node_index_)
1550 << "\n";
1551
1552 std::pop_heap(timestamp_heap.begin(), timestamp_heap.end(),
1553 &SplitMessageReaderHeapCompare);
1554 timestamp_heap.pop_back();
1555 }
1556 ss << " }\n";
1557 }
1558
1559 ss << " message_heap {\n";
1560 {
1561 std::vector<
1562 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1563 message_heap = message_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001564 while (!message_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001565 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1566 oldest_message_reader = message_heap.front();
1567
1568 ss << " " << std::get<2>(oldest_message_reader) << " "
1569 << std::get<0>(oldest_message_reader) << " queue_index ("
1570 << std::get<1>(oldest_message_reader) << ") ttq "
1571 << std::get<2>(oldest_message_reader)->time_to_queue() << " "
1572 << std::get<2>(oldest_message_reader)->filename() << " -> "
1573 << std::get<2>(oldest_message_reader)->DebugString(channel_index_)
1574 << "\n";
1575
1576 std::pop_heap(message_heap.begin(), message_heap.end(),
1577 &SplitMessageReaderHeapCompare);
1578 message_heap.pop_back();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001579 }
Austin Schuh05b70472020-01-01 17:11:17 -08001580 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001581 ss << " }";
1582
1583 return ss.str();
1584}
1585
1586std::string ChannelMerger::DebugString() const {
1587 std::stringstream ss;
1588 ss << "start_time " << realtime_start_time() << " " << monotonic_start_time()
1589 << "\n";
1590 ss << "channel_heap {\n";
1591 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1592 channel_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001593 while (!channel_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001594 std::tuple<monotonic_clock::time_point, int> channel = channel_heap.front();
1595 ss << " " << std::get<0>(channel) << " (" << std::get<1>(channel) << ") "
1596 << configuration::CleanedChannelToString(
1597 configuration()->channels()->Get(std::get<1>(channel)))
1598 << "\n";
1599
1600 ss << timestamp_mergers_[std::get<1>(channel)].DebugString() << "\n";
1601
1602 std::pop_heap(channel_heap.begin(), channel_heap.end(),
1603 &ChannelHeapCompare);
1604 channel_heap.pop_back();
1605 }
1606 ss << "}";
1607
1608 return ss.str();
Austin Schuh05b70472020-01-01 17:11:17 -08001609}
1610
Austin Schuhee711052020-08-24 16:06:09 -07001611std::string MaybeNodeName(const Node *node) {
1612 if (node != nullptr) {
1613 return node->name()->str() + " ";
1614 }
1615 return "";
1616}
1617
Brian Silvermanf51499a2020-09-21 12:49:08 -07001618} // namespace aos::logger