blob: 2886a8eb35f6682bc88bafd3d2911b3dbadd90e5 [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#include "aos/events/logging/logfile_utils.h"
2
3#include <fcntl.h>
Austin Schuha36c8902019-12-30 18:07:15 -08004#include <sys/stat.h>
5#include <sys/types.h>
6#include <sys/uio.h>
7
Brian Silvermanf51499a2020-09-21 12:49:08 -07008#include <algorithm>
9#include <climits>
Austin Schuha36c8902019-12-30 18:07:15 -080010
Austin Schuhe4fca832020-03-07 16:58:53 -080011#include "absl/strings/escaping.h"
Austin Schuh05b70472020-01-01 17:11:17 -080012#include "aos/configuration.h"
Austin Schuhfa895892020-01-07 20:07:41 -080013#include "aos/flatbuffer_merge.h"
Austin Schuh6f3babe2020-01-26 20:34:50 -080014#include "aos/util/file.h"
Austin Schuha36c8902019-12-30 18:07:15 -080015#include "flatbuffers/flatbuffers.h"
Austin Schuh05b70472020-01-01 17:11:17 -080016#include "gflags/gflags.h"
17#include "glog/logging.h"
Austin Schuha36c8902019-12-30 18:07:15 -080018
Brian Silvermanf59fe3f2020-09-22 21:04:09 -070019#if defined(__x86_64__)
20#define ENABLE_LZMA 1
21#elif defined(__aarch64__)
22#define ENABLE_LZMA 1
23#else
24#define ENABLE_LZMA 0
25#endif
26
27#if ENABLE_LZMA
28#include "aos/events/logging/lzma_encoder.h"
29#endif
30
Austin Schuh7fbf5a72020-09-21 16:28:13 -070031DEFINE_int32(flush_size, 128000,
Austin Schuha36c8902019-12-30 18:07:15 -080032 "Number of outstanding bytes to allow before flushing to disk.");
33
Brian Silvermanf51499a2020-09-21 12:49:08 -070034namespace aos::logger {
Austin Schuha36c8902019-12-30 18:07:15 -080035
Austin Schuh05b70472020-01-01 17:11:17 -080036namespace chrono = std::chrono;
37
Brian Silvermanf51499a2020-09-21 12:49:08 -070038DetachedBufferWriter::DetachedBufferWriter(
39 std::string_view filename, std::unique_ptr<DetachedBufferEncoder> encoder)
40 : filename_(filename), encoder_(std::move(encoder)) {
Brian Silvermana9f2ec92020-10-06 18:00:53 -070041 if (!util::MkdirPIfSpace(filename, 0777)) {
42 ran_out_of_space_ = true;
43 } else {
44 fd_ = open(std::string(filename).c_str(),
45 O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
46 if (fd_ == -1 && errno == ENOSPC) {
47 ran_out_of_space_ = true;
48 } else {
49 PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
50 VLOG(1) << "Opened " << filename << " for writing";
51 }
52 }
Austin Schuha36c8902019-12-30 18:07:15 -080053}
54
55DetachedBufferWriter::~DetachedBufferWriter() {
Brian Silverman0465fcf2020-09-24 00:29:18 -070056 Close();
57 if (ran_out_of_space_) {
58 CHECK(acknowledge_ran_out_of_space_)
59 << ": Unacknowledged out of disk space, log file was not completed";
Brian Silvermanf51499a2020-09-21 12:49:08 -070060 }
Austin Schuh2f8fd752020-09-01 22:38:28 -070061}
62
Brian Silvermand90905f2020-09-23 14:42:56 -070063DetachedBufferWriter::DetachedBufferWriter(DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070064 *this = std::move(other);
65}
66
Brian Silverman87ac0402020-09-17 14:47:01 -070067// When other is destroyed "soon" (which it should be because we're getting an
68// rvalue reference to it), it will flush etc all the data we have queued up
69// (because that data will then be its data).
Austin Schuh2f8fd752020-09-01 22:38:28 -070070DetachedBufferWriter &DetachedBufferWriter::operator=(
71 DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070072 std::swap(filename_, other.filename_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070073 std::swap(encoder_, other.encoder_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070074 std::swap(fd_, other.fd_);
Brian Silverman0465fcf2020-09-24 00:29:18 -070075 std::swap(ran_out_of_space_, other.ran_out_of_space_);
76 std::swap(acknowledge_ran_out_of_space_, other.acknowledge_ran_out_of_space_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070077 std::swap(iovec_, other.iovec_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070078 std::swap(max_write_time_, other.max_write_time_);
79 std::swap(max_write_time_bytes_, other.max_write_time_bytes_);
80 std::swap(max_write_time_messages_, other.max_write_time_messages_);
81 std::swap(total_write_time_, other.total_write_time_);
82 std::swap(total_write_count_, other.total_write_count_);
83 std::swap(total_write_messages_, other.total_write_messages_);
84 std::swap(total_write_bytes_, other.total_write_bytes_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070085 return *this;
Austin Schuha36c8902019-12-30 18:07:15 -080086}
87
Brian Silvermanf51499a2020-09-21 12:49:08 -070088void DetachedBufferWriter::QueueSpan(absl::Span<const uint8_t> span) {
Brian Silvermana9f2ec92020-10-06 18:00:53 -070089 if (ran_out_of_space_) {
90 // We don't want any later data to be written after space becomes
91 // available, so refuse to write anything more once we've dropped data
92 // because we ran out of space.
93 VLOG(1) << "Ignoring span: " << span.size();
94 return;
95 }
96
Brian Silvermanf51499a2020-09-21 12:49:08 -070097 if (encoder_->may_bypass() && span.size() > 4096u) {
98 // Over this threshold, we'll assume it's cheaper to add an extra
99 // syscall to write the data immediately instead of copying it to
100 // enqueue.
Austin Schuha36c8902019-12-30 18:07:15 -0800101
Brian Silvermanf51499a2020-09-21 12:49:08 -0700102 // First, flush everything.
103 while (encoder_->queue_size() > 0u) {
104 Flush();
105 }
Austin Schuhde031b72020-01-10 19:34:41 -0800106
Brian Silvermanf51499a2020-09-21 12:49:08 -0700107 // Then, write it directly.
108 const auto start = aos::monotonic_clock::now();
109 const ssize_t written = write(fd_, span.data(), span.size());
110 const auto end = aos::monotonic_clock::now();
Brian Silverman0465fcf2020-09-24 00:29:18 -0700111 HandleWriteReturn(written, span.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700112 UpdateStatsForWrite(end - start, written, 1);
113 } else {
114 encoder_->Encode(CopySpanAsDetachedBuffer(span));
Austin Schuha36c8902019-12-30 18:07:15 -0800115 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700116
117 FlushAtThreshold();
Austin Schuha36c8902019-12-30 18:07:15 -0800118}
119
Brian Silverman0465fcf2020-09-24 00:29:18 -0700120void DetachedBufferWriter::Close() {
121 if (fd_ == -1) {
122 return;
123 }
124 encoder_->Finish();
125 while (encoder_->queue_size() > 0) {
126 Flush();
127 }
128 if (close(fd_) == -1) {
129 if (errno == ENOSPC) {
130 ran_out_of_space_ = true;
131 } else {
132 PLOG(ERROR) << "Closing log file failed";
133 }
134 }
135 fd_ = -1;
136 VLOG(1) << "Closed " << filename_;
137}
138
Austin Schuha36c8902019-12-30 18:07:15 -0800139void DetachedBufferWriter::Flush() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700140 const auto queue = encoder_->queue();
141 if (queue.empty()) {
Austin Schuha36c8902019-12-30 18:07:15 -0800142 return;
143 }
Brian Silverman0465fcf2020-09-24 00:29:18 -0700144 if (ran_out_of_space_) {
145 // We don't want any later data to be written after space becomes available,
146 // so refuse to write anything more once we've dropped data because we ran
147 // out of space.
148 VLOG(1) << "Ignoring queue: " << queue.size();
149 encoder_->Clear(queue.size());
150 return;
151 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700152
Austin Schuha36c8902019-12-30 18:07:15 -0800153 iovec_.clear();
Brian Silvermanf51499a2020-09-21 12:49:08 -0700154 const size_t iovec_size = std::min<size_t>(queue.size(), IOV_MAX);
155 iovec_.resize(iovec_size);
Austin Schuha36c8902019-12-30 18:07:15 -0800156 size_t counted_size = 0;
Brian Silvermanf51499a2020-09-21 12:49:08 -0700157 for (size_t i = 0; i < iovec_size; ++i) {
158 iovec_[i].iov_base = const_cast<uint8_t *>(queue[i].data());
159 iovec_[i].iov_len = queue[i].size();
160 counted_size += iovec_[i].iov_len;
Austin Schuha36c8902019-12-30 18:07:15 -0800161 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700162
163 const auto start = aos::monotonic_clock::now();
Austin Schuha36c8902019-12-30 18:07:15 -0800164 const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700165 const auto end = aos::monotonic_clock::now();
Brian Silverman0465fcf2020-09-24 00:29:18 -0700166 HandleWriteReturn(written, counted_size);
Brian Silvermanf51499a2020-09-21 12:49:08 -0700167
168 encoder_->Clear(iovec_size);
169
170 UpdateStatsForWrite(end - start, written, iovec_size);
171}
172
Brian Silverman0465fcf2020-09-24 00:29:18 -0700173void DetachedBufferWriter::HandleWriteReturn(ssize_t write_return,
174 size_t write_size) {
175 if (write_return == -1 && errno == ENOSPC) {
176 ran_out_of_space_ = true;
177 return;
178 }
179 PCHECK(write_return >= 0) << ": write failed";
180 if (write_return < static_cast<ssize_t>(write_size)) {
181 // Sometimes this happens instead of ENOSPC. On a real filesystem, this
182 // never seems to happen in any other case. If we ever want to log to a
183 // socket, this will happen more often. However, until we get there, we'll
184 // just assume it means we ran out of space.
185 ran_out_of_space_ = true;
186 return;
187 }
188}
189
Brian Silvermanf51499a2020-09-21 12:49:08 -0700190void DetachedBufferWriter::UpdateStatsForWrite(
191 aos::monotonic_clock::duration duration, ssize_t written, int iovec_size) {
192 if (duration > max_write_time_) {
193 max_write_time_ = duration;
194 max_write_time_bytes_ = written;
195 max_write_time_messages_ = iovec_size;
196 }
197 total_write_time_ += duration;
198 ++total_write_count_;
199 total_write_messages_ += iovec_size;
200 total_write_bytes_ += written;
201}
202
203void DetachedBufferWriter::FlushAtThreshold() {
204 // Flush if we are at the max number of iovs per writev, because there's no
205 // point queueing up any more data in memory. Also flush once we have enough
206 // data queued up.
207 while (encoder_->queued_bytes() > static_cast<size_t>(FLAGS_flush_size) ||
208 encoder_->queue_size() >= IOV_MAX) {
209 Flush();
210 }
Austin Schuha36c8902019-12-30 18:07:15 -0800211}
212
213flatbuffers::Offset<MessageHeader> PackMessage(
214 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
215 int channel_index, LogType log_type) {
216 flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;
217
218 switch (log_type) {
219 case LogType::kLogMessage:
220 case LogType::kLogMessageAndDeliveryTime:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800221 case LogType::kLogRemoteMessage:
Brian Silvermaneaa41d62020-07-08 19:47:35 -0700222 data_offset = fbb->CreateVector(
223 static_cast<const uint8_t *>(context.data), context.size);
Austin Schuha36c8902019-12-30 18:07:15 -0800224 break;
225
226 case LogType::kLogDeliveryTimeOnly:
227 break;
228 }
229
230 MessageHeader::Builder message_header_builder(*fbb);
231 message_header_builder.add_channel_index(channel_index);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800232
233 switch (log_type) {
234 case LogType::kLogRemoteMessage:
235 message_header_builder.add_queue_index(context.remote_queue_index);
236 message_header_builder.add_monotonic_sent_time(
237 context.monotonic_remote_time.time_since_epoch().count());
238 message_header_builder.add_realtime_sent_time(
239 context.realtime_remote_time.time_since_epoch().count());
240 break;
241
242 case LogType::kLogMessage:
243 case LogType::kLogMessageAndDeliveryTime:
244 case LogType::kLogDeliveryTimeOnly:
245 message_header_builder.add_queue_index(context.queue_index);
246 message_header_builder.add_monotonic_sent_time(
247 context.monotonic_event_time.time_since_epoch().count());
248 message_header_builder.add_realtime_sent_time(
249 context.realtime_event_time.time_since_epoch().count());
250 break;
251 }
Austin Schuha36c8902019-12-30 18:07:15 -0800252
253 switch (log_type) {
254 case LogType::kLogMessage:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800255 case LogType::kLogRemoteMessage:
Austin Schuha36c8902019-12-30 18:07:15 -0800256 message_header_builder.add_data(data_offset);
257 break;
258
259 case LogType::kLogMessageAndDeliveryTime:
260 message_header_builder.add_data(data_offset);
261 [[fallthrough]];
262
263 case LogType::kLogDeliveryTimeOnly:
264 message_header_builder.add_monotonic_remote_time(
265 context.monotonic_remote_time.time_since_epoch().count());
266 message_header_builder.add_realtime_remote_time(
267 context.realtime_remote_time.time_since_epoch().count());
268 message_header_builder.add_remote_queue_index(context.remote_queue_index);
269 break;
270 }
271
272 return message_header_builder.Finish();
273}
274
Brian Silvermanf51499a2020-09-21 12:49:08 -0700275SpanReader::SpanReader(std::string_view filename) : filename_(filename) {
Brian Silvermanf59fe3f2020-09-22 21:04:09 -0700276 static const std::string_view kXz = ".xz";
277 if (filename.substr(filename.size() - kXz.size()) == kXz) {
278#if ENABLE_LZMA
279 decoder_ = std::make_unique<LzmaDecoder>(filename);
280#else
281 LOG(FATAL) << "Reading xz-compressed files not supported on this platform";
282#endif
283 } else {
284 decoder_ = std::make_unique<DummyDecoder>(filename);
285 }
Austin Schuh05b70472020-01-01 17:11:17 -0800286}
287
288absl::Span<const uint8_t> SpanReader::ReadMessage() {
289 // Make sure we have enough for the size.
290 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
291 if (!ReadBlock()) {
292 return absl::Span<const uint8_t>();
293 }
294 }
295
296 // Now make sure we have enough for the message.
297 const size_t data_size =
298 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
299 sizeof(flatbuffers::uoffset_t);
Austin Schuhe4fca832020-03-07 16:58:53 -0800300 if (data_size == sizeof(flatbuffers::uoffset_t)) {
301 LOG(ERROR) << "Size of data is zero. Log file end is corrupted, skipping.";
302 LOG(ERROR) << " Rest of log file is "
303 << absl::BytesToHexString(std::string_view(
304 reinterpret_cast<const char *>(data_.data() +
305 consumed_data_),
306 data_.size() - consumed_data_));
307 return absl::Span<const uint8_t>();
308 }
Austin Schuh05b70472020-01-01 17:11:17 -0800309 while (data_.size() < consumed_data_ + data_size) {
310 if (!ReadBlock()) {
311 return absl::Span<const uint8_t>();
312 }
313 }
314
315 // And return it, consuming the data.
316 const uint8_t *data_ptr = data_.data() + consumed_data_;
317
318 consumed_data_ += data_size;
319
320 return absl::Span<const uint8_t>(data_ptr, data_size);
321}
322
Austin Schuh05b70472020-01-01 17:11:17 -0800323bool SpanReader::ReadBlock() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700324 // This is the amount of data we grab at a time. Doing larger chunks minimizes
325 // syscalls and helps decompressors batch things more efficiently.
Austin Schuh05b70472020-01-01 17:11:17 -0800326 constexpr size_t kReadSize = 256 * 1024;
327
328 // Strip off any unused data at the front.
329 if (consumed_data_ != 0) {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700330 data_.erase_front(consumed_data_);
Austin Schuh05b70472020-01-01 17:11:17 -0800331 consumed_data_ = 0;
332 }
333
334 const size_t starting_size = data_.size();
335
336 // This should automatically grow the backing store. It won't shrink if we
337 // get a small chunk later. This reduces allocations when we want to append
338 // more data.
Brian Silvermanf51499a2020-09-21 12:49:08 -0700339 data_.resize(starting_size + kReadSize);
Austin Schuh05b70472020-01-01 17:11:17 -0800340
Brian Silvermanf51499a2020-09-21 12:49:08 -0700341 const size_t count =
342 decoder_->Read(data_.begin() + starting_size, data_.end());
343 data_.resize(starting_size + count);
Austin Schuh05b70472020-01-01 17:11:17 -0800344 if (count == 0) {
Austin Schuh05b70472020-01-01 17:11:17 -0800345 return false;
346 }
Austin Schuh05b70472020-01-01 17:11:17 -0800347
348 return true;
349}
350
Austin Schuhadd6eb32020-11-09 21:24:26 -0800351std::optional<SizePrefixedFlatbufferVector<LogFileHeader>> ReadHeader(
Austin Schuh3bd4c402020-11-06 18:19:06 -0800352 std::string_view filename) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800353 SpanReader span_reader(filename);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800354 absl::Span<const uint8_t> config_data = span_reader.ReadMessage();
355
356 // Make sure something was read.
Austin Schuh3bd4c402020-11-06 18:19:06 -0800357 if (config_data == absl::Span<const uint8_t>()) {
358 return std::nullopt;
359 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800360
Austin Schuh5212cad2020-09-09 23:12:09 -0700361 // And copy the config so we have it forever, removing the size prefix.
Brian Silverman354697a2020-09-22 21:06:32 -0700362 ResizeableBuffer data;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800363 data.resize(config_data.size());
364 memcpy(data.data(), config_data.begin(), data.size());
365 return SizePrefixedFlatbufferVector<LogFileHeader>(std::move(data));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800366}
367
Austin Schuhadd6eb32020-11-09 21:24:26 -0800368std::optional<SizePrefixedFlatbufferVector<MessageHeader>> ReadNthMessage(
Austin Schuh3bd4c402020-11-06 18:19:06 -0800369 std::string_view filename, size_t n) {
Austin Schuh5212cad2020-09-09 23:12:09 -0700370 SpanReader span_reader(filename);
371 absl::Span<const uint8_t> data_span = span_reader.ReadMessage();
372 for (size_t i = 0; i < n + 1; ++i) {
373 data_span = span_reader.ReadMessage();
374
375 // Make sure something was read.
Austin Schuh3bd4c402020-11-06 18:19:06 -0800376 if (data_span == absl::Span<const uint8_t>()) {
377 return std::nullopt;
378 }
Austin Schuh5212cad2020-09-09 23:12:09 -0700379 }
380
Brian Silverman354697a2020-09-22 21:06:32 -0700381 // And copy the config so we have it forever, removing the size prefix.
382 ResizeableBuffer data;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800383 data.resize(data_span.size());
384 memcpy(data.data(), data_span.begin(), data.size());
385 return SizePrefixedFlatbufferVector<MessageHeader>(std::move(data));
Austin Schuh5212cad2020-09-09 23:12:09 -0700386}
387
Austin Schuh05b70472020-01-01 17:11:17 -0800388MessageReader::MessageReader(std::string_view filename)
Austin Schuh97789fc2020-08-01 14:42:45 -0700389 : span_reader_(filename),
Austin Schuhadd6eb32020-11-09 21:24:26 -0800390 raw_log_file_header_(
391 SizePrefixedFlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuh05b70472020-01-01 17:11:17 -0800392 // Make sure we have enough to read the size.
Austin Schuh97789fc2020-08-01 14:42:45 -0700393 absl::Span<const uint8_t> header_data = span_reader_.ReadMessage();
Austin Schuh05b70472020-01-01 17:11:17 -0800394
395 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700396 CHECK(header_data != absl::Span<const uint8_t>())
397 << ": Failed to read header from: " << filename;
Austin Schuh05b70472020-01-01 17:11:17 -0800398
Austin Schuh97789fc2020-08-01 14:42:45 -0700399 // And copy the header data so we have it forever.
Brian Silverman354697a2020-09-22 21:06:32 -0700400 ResizeableBuffer header_data_copy;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800401 header_data_copy.resize(header_data.size());
402 memcpy(header_data_copy.data(), header_data.begin(), header_data_copy.size());
Austin Schuh97789fc2020-08-01 14:42:45 -0700403 raw_log_file_header_ =
Austin Schuhadd6eb32020-11-09 21:24:26 -0800404 SizePrefixedFlatbufferVector<LogFileHeader>(std::move(header_data_copy));
Austin Schuh05b70472020-01-01 17:11:17 -0800405
Austin Schuhcde938c2020-02-02 17:30:07 -0800406 max_out_of_order_duration_ =
Austin Schuh2f8fd752020-09-01 22:38:28 -0700407 chrono::nanoseconds(log_file_header()->max_out_of_order_duration());
Austin Schuhcde938c2020-02-02 17:30:07 -0800408
409 VLOG(1) << "Opened " << filename << " as node "
410 << FlatbufferToJson(log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800411}
412
Austin Schuhadd6eb32020-11-09 21:24:26 -0800413std::optional<SizePrefixedFlatbufferVector<MessageHeader>>
414MessageReader::ReadMessage() {
Austin Schuh05b70472020-01-01 17:11:17 -0800415 absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();
416 if (msg_data == absl::Span<const uint8_t>()) {
417 return std::nullopt;
418 }
419
Brian Silverman354697a2020-09-22 21:06:32 -0700420 ResizeableBuffer result_buffer;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800421 result_buffer.resize(msg_data.size());
422 memcpy(result_buffer.data(), msg_data.begin(), result_buffer.size());
423 SizePrefixedFlatbufferVector<MessageHeader> result(std::move(result_buffer));
Austin Schuh05b70472020-01-01 17:11:17 -0800424
425 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
426 chrono::nanoseconds(result.message().monotonic_sent_time()));
427
428 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
Austin Schuh8bd96322020-02-13 21:18:22 -0800429 VLOG(2) << "Read from " << filename() << " data " << FlatbufferToJson(result);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800430 return std::move(result);
Austin Schuh05b70472020-01-01 17:11:17 -0800431}
432
Austin Schuhc41603c2020-10-11 16:17:37 -0700433PartsMessageReader::PartsMessageReader(LogParts log_parts)
434 : parts_(std::move(log_parts)), message_reader_(parts_.parts[0]) {}
435
Austin Schuhadd6eb32020-11-09 21:24:26 -0800436std::optional<SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuhc41603c2020-10-11 16:17:37 -0700437PartsMessageReader::ReadMessage() {
438 while (!done_) {
Austin Schuhadd6eb32020-11-09 21:24:26 -0800439 std::optional<SizePrefixedFlatbufferVector<MessageHeader>> message =
Austin Schuhc41603c2020-10-11 16:17:37 -0700440 message_reader_.ReadMessage();
441 if (message) {
442 newest_timestamp_ = message_reader_.newest_timestamp();
Austin Schuh32f68492020-11-08 21:45:51 -0800443 const monotonic_clock::time_point monotonic_sent_time(
444 chrono::nanoseconds(message->message().monotonic_sent_time()));
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800445 // TODO(austin): Does this work with startup? Might need to use the start
446 // time.
447 // TODO(austin): Does this work with startup when we don't know the remote
448 // start time too? Look at one of those logs to compare.
Austin Schuh32f68492020-11-08 21:45:51 -0800449 CHECK_GE(monotonic_sent_time,
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800450 newest_timestamp_ - max_out_of_order_duration())
451 << ": Max out of order exceeded.";
Austin Schuhc41603c2020-10-11 16:17:37 -0700452 return message;
453 }
454 NextLog();
455 }
Austin Schuh32f68492020-11-08 21:45:51 -0800456 newest_timestamp_ = monotonic_clock::max_time;
Austin Schuhc41603c2020-10-11 16:17:37 -0700457 return std::nullopt;
458}
459
460void PartsMessageReader::NextLog() {
461 if (next_part_index_ == parts_.parts.size()) {
462 done_ = true;
463 return;
464 }
465 message_reader_ = MessageReader(parts_.parts[next_part_index_]);
466 ++next_part_index_;
467}
468
Austin Schuh1be0ce42020-11-29 22:43:26 -0800469bool Message::operator<(const Message &m2) const {
470 if (this->timestamp < m2.timestamp) {
471 return true;
472 } else if (this->timestamp > m2.timestamp) {
473 return false;
474 }
475
476 if (this->channel_index < m2.channel_index) {
477 return true;
478 } else if (this->channel_index > m2.channel_index) {
479 return false;
480 }
481
482 return this->queue_index < m2.queue_index;
483}
484
485bool Message::operator>=(const Message &m2) const { return !(*this < m2); }
Austin Schuh8f52ed52020-11-30 23:12:39 -0800486bool Message::operator==(const Message &m2) const {
487 return timestamp == m2.timestamp && channel_index == m2.channel_index &&
488 queue_index == m2.queue_index;
489}
Austin Schuh1be0ce42020-11-29 22:43:26 -0800490
491std::ostream &operator<<(std::ostream &os, const Message &m) {
492 os << "{.channel_index=" << m.channel_index
Austin Schuhd2f96102020-12-01 20:27:29 -0800493 << ", .queue_index=" << m.queue_index << ", .timestamp=" << m.timestamp;
494 if (m.data.Verify()) {
495 os << ", .data="
496 << aos::FlatbufferToJson(m.data,
497 {.multi_line = false, .max_vector_size = 1});
498 }
499 os << "}";
500 return os;
501}
502
503std::ostream &operator<<(std::ostream &os, const TimestampedMessage &m) {
504 os << "{.channel_index=" << m.channel_index
505 << ", .queue_index=" << m.queue_index
506 << ", .monotonic_event_time=" << m.monotonic_event_time
507 << ", .realtime_event_time=" << m.realtime_event_time;
508 if (m.remote_queue_index != 0xffffffff) {
509 os << ", .remote_queue_index=" << m.remote_queue_index;
510 }
511 if (m.monotonic_remote_time != monotonic_clock::min_time) {
512 os << ", .monotonic_remote_time=" << m.monotonic_remote_time;
513 }
514 if (m.realtime_remote_time != realtime_clock::min_time) {
515 os << ", .realtime_remote_time=" << m.realtime_remote_time;
516 }
517 if (m.data.Verify()) {
518 os << ", .data="
519 << aos::FlatbufferToJson(m.data,
520 {.multi_line = false, .max_vector_size = 1});
521 }
522 os << "}";
Austin Schuh1be0ce42020-11-29 22:43:26 -0800523 return os;
524}
525
Austin Schuh4b5c22a2020-11-30 22:58:43 -0800526LogPartsSorter::LogPartsSorter(LogParts log_parts)
527 : parts_message_reader_(log_parts) {}
528
529Message *LogPartsSorter::Front() {
530 // Queue up data until enough data has been queued that the front message is
531 // sorted enough to be safe to pop. This may do nothing, so we should make
532 // sure the nothing path is checked quickly.
533 if (sorted_until() != monotonic_clock::max_time) {
534 while (true) {
535 if (!messages_.empty() && messages_.begin()->timestamp < sorted_until()) {
536 break;
537 }
538
539 std::optional<SizePrefixedFlatbufferVector<MessageHeader>> m =
540 parts_message_reader_.ReadMessage();
541 // No data left, sorted forever, work through what is left.
542 if (!m) {
543 sorted_until_ = monotonic_clock::max_time;
544 break;
545 }
546
547 messages_.insert(
548 {.channel_index = m.value().message().channel_index(),
549 .queue_index = m.value().message().queue_index(),
550 .timestamp = monotonic_clock::time_point(std::chrono::nanoseconds(
551 m.value().message().monotonic_sent_time())),
552 .data = std::move(m.value())});
553
554 // Now, update sorted_until_ to match the new message.
555 if (parts_message_reader_.newest_timestamp() >
556 monotonic_clock::min_time +
557 parts_message_reader_.max_out_of_order_duration()) {
558 sorted_until_ = parts_message_reader_.newest_timestamp() -
559 parts_message_reader_.max_out_of_order_duration();
560 } else {
561 sorted_until_ = monotonic_clock::min_time;
562 }
563 }
564 }
565
566 // Now that we have enough data queued, return a pointer to the oldest piece
567 // of data if it exists.
568 if (messages_.empty()) {
569 return nullptr;
570 }
571
572 return &(*messages_.begin());
573}
574
575void LogPartsSorter::PopFront() { messages_.erase(messages_.begin()); }
576
577std::string LogPartsSorter::DebugString() const {
578 std::stringstream ss;
579 ss << "messages: [\n";
580 for (const Message &m : messages_) {
581 ss << m << "\n";
582 }
583 ss << "] <- " << parts_message_reader_.filename();
584 return ss.str();
585}
586
Austin Schuhd2f96102020-12-01 20:27:29 -0800587NodeMerger::NodeMerger(std::vector<LogParts> parts) {
588 CHECK_GE(parts.size(), 1u);
589 const std::string part0_node = parts[0].node;
590 for (size_t i = 1; i < parts.size(); ++i) {
591 CHECK_EQ(part0_node, parts[i].node) << ": Can't merge different nodes.";
592 }
593 for (LogParts &part : parts) {
594 parts_sorters_.emplace_back(std::move(part));
595 }
596
597 node_ = configuration::GetNodeIndex(log_file_header()->configuration(),
598 part0_node);
599
600 monotonic_start_time_ = monotonic_clock::max_time;
601 realtime_start_time_ = realtime_clock::max_time;
602 for (const LogPartsSorter &parts_sorter : parts_sorters_) {
603 if (parts_sorter.monotonic_start_time() < monotonic_start_time_) {
604 monotonic_start_time_ = parts_sorter.monotonic_start_time();
605 realtime_start_time_ = parts_sorter.realtime_start_time();
606 }
607 }
608}
Austin Schuh8f52ed52020-11-30 23:12:39 -0800609
610Message *NodeMerger::Front() {
611 // Return the current Front if we have one, otherwise go compute one.
612 if (current_ != nullptr) {
613 return current_->Front();
614 }
615
616 // Otherwise, do a simple search for the oldest message, deduplicating any
617 // duplicates.
618 Message *oldest = nullptr;
619 sorted_until_ = monotonic_clock::max_time;
Austin Schuhd2f96102020-12-01 20:27:29 -0800620 for (LogPartsSorter &parts_sorter : parts_sorters_) {
621 Message *m = parts_sorter.Front();
Austin Schuh8f52ed52020-11-30 23:12:39 -0800622 if (!m) {
Austin Schuhd2f96102020-12-01 20:27:29 -0800623 sorted_until_ = std::min(sorted_until_, parts_sorter.sorted_until());
Austin Schuh8f52ed52020-11-30 23:12:39 -0800624 continue;
625 }
626 if (oldest == nullptr || *m < *oldest) {
627 oldest = m;
Austin Schuhd2f96102020-12-01 20:27:29 -0800628 current_ = &parts_sorter;
Austin Schuh8f52ed52020-11-30 23:12:39 -0800629 } else if (*m == *oldest) {
630 // Found a duplicate. It doesn't matter which one we return. It is
631 // easiest to just drop the new one.
Austin Schuhd2f96102020-12-01 20:27:29 -0800632 parts_sorter.PopFront();
Austin Schuh8f52ed52020-11-30 23:12:39 -0800633 }
634
635 // PopFront may change this, so compute it down here.
Austin Schuhd2f96102020-12-01 20:27:29 -0800636 sorted_until_ = std::min(sorted_until_, parts_sorter.sorted_until());
Austin Schuh8f52ed52020-11-30 23:12:39 -0800637 }
638
639 // Return the oldest message found. This will be nullptr if nothing was
640 // found, indicating there is nothing left.
641 return oldest;
642}
643
644void NodeMerger::PopFront() {
645 CHECK(current_ != nullptr) << "Popping before calling Front()";
646 current_->PopFront();
647 current_ = nullptr;
648}
649
Austin Schuhd2f96102020-12-01 20:27:29 -0800650TimestampMapper::TimestampMapper(std::vector<LogParts> parts)
651 : node_merger_(std::move(parts)),
652 node_(node_merger_.node()),
653 message_{.channel_index = 0xffffffff,
654 .queue_index = 0xffffffff,
655 .monotonic_event_time = monotonic_clock::min_time,
656 .realtime_event_time = realtime_clock::min_time,
657 .remote_queue_index = 0xffffffff,
658 .monotonic_remote_time = monotonic_clock::min_time,
659 .realtime_remote_time = realtime_clock::min_time,
660 .data = SizePrefixedFlatbufferVector<MessageHeader>::Empty()} {
661 const Configuration *config = log_file_header()->configuration();
662 // Only fill out nodes_data_ if there are nodes. Otherwise everything gets
663 // pretty simple.
664 if (configuration::MultiNode(config)) {
665 nodes_data_.resize(config->nodes()->size());
666 const Node *my_node = config->nodes()->Get(node());
667 for (size_t node_index = 0; node_index < nodes_data_.size(); ++node_index) {
668 const Node *node = config->nodes()->Get(node_index);
669 NodeData *node_data = &nodes_data_[node_index];
670 node_data->channels.resize(config->channels()->size());
671 // We should save the channel if it is delivered to the node represented
672 // by the NodeData, but not sent by that node. That combo means it is
673 // forwarded.
674 size_t channel_index = 0;
675 node_data->any_delivered = false;
676 for (const Channel *channel : *config->channels()) {
677 node_data->channels[channel_index].delivered =
678 configuration::ChannelIsReadableOnNode(channel, node) &&
679 configuration::ChannelIsSendableOnNode(channel, my_node);
680 node_data->any_delivered = node_data->any_delivered ||
681 node_data->channels[channel_index].delivered;
682 ++channel_index;
683 }
684 }
685
686 for (const Channel *channel : *config->channels()) {
687 source_node_.emplace_back(configuration::GetNodeIndex(
688 config, channel->source_node()->string_view()));
689 }
690 }
691}
692
693void TimestampMapper::AddPeer(TimestampMapper *timestamp_mapper) {
694 CHECK(configuration::MultiNode(log_file_header()->configuration()));
695 CHECK_NE(timestamp_mapper->node(), node());
696 CHECK_LT(timestamp_mapper->node(), nodes_data_.size());
697
698 NodeData *node_data = &nodes_data_[timestamp_mapper->node()];
699 // Only set it if this node delivers to the peer timestamp_mapper. Otherwise
700 // we could needlessly save data.
701 if (node_data->any_delivered) {
702 LOG(INFO) << "Registering on node " << node() << " for peer node "
703 << timestamp_mapper->node();
704 CHECK(timestamp_mapper->nodes_data_[node()].peer == nullptr);
705
706 timestamp_mapper->nodes_data_[node()].peer = this;
707 }
708}
709
710void TimestampMapper::FillMessage(Message *m) {
711 message_ = {
712 .channel_index = m->channel_index,
713 .queue_index = m->queue_index,
714 .monotonic_event_time = m->timestamp,
715 .realtime_event_time = aos::realtime_clock::time_point(
716 std::chrono::nanoseconds(m->data.message().realtime_sent_time())),
717 .remote_queue_index = 0xffffffff,
718 .monotonic_remote_time = monotonic_clock::min_time,
719 .realtime_remote_time = realtime_clock::min_time,
720 .data = std::move(m->data)};
721}
722
723TimestampedMessage *TimestampMapper::Front() {
724 // No need to fetch anything new. A previous message still exists.
725 switch (first_message_) {
726 case FirstMessage::kNeedsUpdate:
727 break;
728 case FirstMessage::kInMessage:
729 return &message_;
730 case FirstMessage::kNullptr:
731 return nullptr;
732 }
733
734 if (nodes_data_.empty()) {
735 // Simple path. We are single node, so there are no timestamps to match!
736 CHECK_EQ(messages_.size(), 0u);
737 Message *m = node_merger_.Front();
738 if (!m) {
739 first_message_ = FirstMessage::kNullptr;
740 return nullptr;
741 }
742 // Fill in message_ so we have a place to associate remote timestamps, and
743 // return it.
744 FillMessage(m);
745
746 CHECK_GE(message_.monotonic_event_time, last_message_time_);
747 last_message_time_ = message_.monotonic_event_time;
748 first_message_ = FirstMessage::kInMessage;
749 return &message_;
750 }
751
752 // We need to only add messages to the list so they get processed for messages
753 // which are delivered. Reuse the flow below which uses messages_ by just
754 // adding the new message to messages_ and continuing.
755 if (messages_.empty()) {
756 if (!Queue()) {
757 // Found nothing to add, we are out of data!
758 first_message_ = FirstMessage::kNullptr;
759 return nullptr;
760 }
761
762 // Now that it has been added (and cannibalized), forget about it upstream.
763 node_merger_.PopFront();
764 }
765
766 Message *m = &(messages_.front());
767
768 if (source_node_[m->channel_index] == node()) {
769 // From us, just forward it on, filling the remote data in as invalid.
770 FillMessage(m);
771 CHECK_GE(message_.monotonic_event_time, last_message_time_);
772 last_message_time_ = message_.monotonic_event_time;
773 first_message_ = FirstMessage::kInMessage;
774 return &message_;
775 } else {
776 // Got a timestamp, find the matching remote data, match it, and return it.
777 Message data = MatchingMessageFor(*m);
778
779 // Return the data from the remote. The local message only has timestamp
780 // info which isn't relevant anymore once extracted.
781 message_ = {
782 .channel_index = m->channel_index,
783 .queue_index = m->queue_index,
784 .monotonic_event_time = m->timestamp,
785 .realtime_event_time = aos::realtime_clock::time_point(
786 std::chrono::nanoseconds(m->data.message().realtime_sent_time())),
787 .remote_queue_index = m->data.message().remote_queue_index(),
788 .monotonic_remote_time =
789 monotonic_clock::time_point(std::chrono::nanoseconds(
790 m->data.message().monotonic_remote_time())),
791 .realtime_remote_time = realtime_clock::time_point(
792 std::chrono::nanoseconds(m->data.message().realtime_remote_time())),
793 .data = std::move(data.data)};
794 CHECK_GE(message_.monotonic_event_time, last_message_time_);
795 last_message_time_ = message_.monotonic_event_time;
796 first_message_ = FirstMessage::kInMessage;
797 return &message_;
798 }
799}
800
801void TimestampMapper::PopFront() {
802 CHECK(first_message_ != FirstMessage::kNeedsUpdate);
803 first_message_ = FirstMessage::kNeedsUpdate;
804
805 if (nodes_data_.empty()) {
806 // We are thin wrapper around node_merger. Call it directly.
807 node_merger_.PopFront();
808 } else {
809 // Since messages_ holds the data, drop it.
810 messages_.pop_front();
811 }
812}
813
814Message TimestampMapper::MatchingMessageFor(const Message &message) {
815 TimestampMapper *peer =
816 CHECK_NOTNULL(nodes_data_[source_node_[message.channel_index]].peer);
817 // The queue which will have the matching data, if available.
818 std::deque<Message> *data_queue =
819 &peer->nodes_data_[node()].channels[message.channel_index].messages;
820
821 // Figure out what queue index we are looking for.
822 CHECK(message.data.message().has_remote_queue_index());
823 const uint32_t remote_queue_index =
824 message.data.message().remote_queue_index();
825
826 CHECK(message.data.message().has_monotonic_remote_time());
827 CHECK(message.data.message().has_realtime_remote_time());
828
829 const monotonic_clock::time_point monotonic_remote_time(
830 std::chrono::nanoseconds(message.data.message().monotonic_remote_time()));
831 const realtime_clock::time_point realtime_remote_time(
832 std::chrono::nanoseconds(message.data.message().realtime_remote_time()));
833
834 peer->QueueUntil(monotonic_remote_time);
835
836 if (data_queue->empty()) {
837 return Message{
838 .channel_index = message.channel_index,
839 .queue_index = remote_queue_index,
840 .timestamp = monotonic_remote_time,
841 .data = SizePrefixedFlatbufferVector<MessageHeader>::Empty()};
842 }
843
844 // The algorithm below is constant time with some assumptions. We need there
845 // to be no missing messages in the data stream. This also assumes a queue
846 // hasn't wrapped. That is conservative, but should let us get started.
847 //
848 // TODO(austin): We can break these assumptions pretty easily once we have a
849 // need.
850 CHECK_EQ(
851 data_queue->back().queue_index - data_queue->front().queue_index + 1u,
852 data_queue->size());
853
854 if (remote_queue_index < data_queue->front().queue_index ||
855 remote_queue_index > data_queue->back().queue_index) {
856 return Message{
857 .channel_index = message.channel_index,
858 .queue_index = remote_queue_index,
859 .timestamp = monotonic_remote_time,
860 .data = SizePrefixedFlatbufferVector<MessageHeader>::Empty()};
861 }
862
863 // Pull the data out and confirm that the timestamps match as expected.
864 Message result = std::move(
865 (*data_queue)[remote_queue_index - data_queue->front().queue_index]);
866 CHECK_EQ(result.timestamp, monotonic_remote_time)
867 << ": Queue index matches, but timestamp doesn't. Please investigate!";
868 CHECK_EQ(realtime_clock::time_point(std::chrono::nanoseconds(
869 result.data.message().realtime_sent_time())),
870 realtime_remote_time)
871 << ": Queue index matches, but timestamp doesn't. Please investigate!";
872 // Now drop the data off the front. We have deduplicated timestamps, so we
873 // are done. And all the data is in order.
874 data_queue->erase(data_queue->begin(),
875 data_queue->begin() + (1 + remote_queue_index -
876 data_queue->front().queue_index));
877 return result;
878}
879
880void TimestampMapper::QueueUntil(monotonic_clock::time_point t) {
881 if (queued_until_ > t) {
882 return;
883 }
884 while (true) {
885 if (!messages_.empty() && messages_.back().timestamp > t) {
886 queued_until_ = std::max(queued_until_, messages_.back().timestamp);
887 return;
888 }
889
890 if (!Queue()) {
891 // Found nothing to add, we are out of data!
892 queued_until_ = monotonic_clock::max_time;
893 return;
894 }
895
896 // Now that it has been added (and cannibalized), forget about it upstream.
897 node_merger_.PopFront();
898 }
899}
900
901bool TimestampMapper::Queue() {
902 Message *m = node_merger_.Front();
903 if (m == nullptr) {
904 return false;
905 }
906 for (NodeData &node_data : nodes_data_) {
907 if (!node_data.any_delivered) continue;
908 if (node_data.channels[m->channel_index].delivered) {
909 // TODO(austin): This copies the data... Probably not worth stressing
910 // about yet.
911 // TODO(austin): Bound how big this can get. We tend not to send massive
912 // data, so we can probably ignore this for a bit.
913 node_data.channels[m->channel_index].messages.emplace_back(*m);
914 }
915 }
916
917 messages_.emplace_back(std::move(*m));
918 return true;
919}
920
921std::string TimestampMapper::DebugString() const {
922 std::stringstream ss;
923 ss << "node " << node() << " [\n";
924 for (const Message &message : messages_) {
925 ss << " " << message << "\n";
926 }
927 ss << "] queued_until " << queued_until_;
928 for (const NodeData &ns : nodes_data_) {
929 if (ns.peer == nullptr) continue;
930 ss << "\nnode " << ns.peer->node() << " remote_data [\n";
931 size_t channel_index = 0;
932 for (const NodeData::ChannelData &channel_data :
933 ns.peer->nodes_data_[node()].channels) {
934 if (channel_data.messages.empty()) {
935 continue;
936 }
937
938 ss << " channel " << channel_index << " [\n";
939 for (const Message &m : channel_data.messages) {
940 ss << " " << m << "\n";
941 }
942 ss << " ]\n";
943 ++channel_index;
944 }
945 ss << "] queued_until " << ns.peer->queued_until_;
946 }
947 return ss.str();
948}
949
Austin Schuh6f3babe2020-01-26 20:34:50 -0800950SplitMessageReader::SplitMessageReader(
Austin Schuhfa895892020-01-07 20:07:41 -0800951 const std::vector<std::string> &filenames)
952 : filenames_(filenames),
Austin Schuhadd6eb32020-11-09 21:24:26 -0800953 log_file_header_(SizePrefixedFlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuhfa895892020-01-07 20:07:41 -0800954 CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";
955
Austin Schuh6f3babe2020-01-26 20:34:50 -0800956 // Grab any log file header. They should all match (and we will check as we
957 // open more of them).
Austin Schuh97789fc2020-08-01 14:42:45 -0700958 log_file_header_ = message_reader_->raw_log_file_header();
Austin Schuhfa895892020-01-07 20:07:41 -0800959
Austin Schuh2f8fd752020-09-01 22:38:28 -0700960 for (size_t i = 1; i < filenames_.size(); ++i) {
961 MessageReader message_reader(filenames_[i]);
962
963 const monotonic_clock::time_point new_monotonic_start_time(
964 chrono::nanoseconds(
965 message_reader.log_file_header()->monotonic_start_time()));
966 const realtime_clock::time_point new_realtime_start_time(
967 chrono::nanoseconds(
968 message_reader.log_file_header()->realtime_start_time()));
969
970 // There are 2 types of part files. Part files from before time estimation
971 // has started, and part files after. We don't declare a log file "started"
972 // until time estimation is up. And once a log file starts, it should never
973 // stop again, and should remain constant.
974 // To compare both types of headers, we mutate our saved copy of the header
975 // to match the next chunk by updating time if we detect a stopped ->
976 // started transition.
977 if (monotonic_start_time() == monotonic_clock::min_time) {
978 CHECK_EQ(realtime_start_time(), realtime_clock::min_time);
979 // We should only be missing the monotonic start time when logging data
Brian Silverman87ac0402020-09-17 14:47:01 -0700980 // for remote nodes. We don't have a good way to determine the remote
Austin Schuh2f8fd752020-09-01 22:38:28 -0700981 // realtime offset, so it shouldn't be filled out.
982 // TODO(austin): If we have a good way, feel free to fill it out. It
983 // probably won't be better than we could do in post though with the same
984 // data.
985 CHECK(!log_file_header_.mutable_message()->has_realtime_start_time());
986 if (new_monotonic_start_time != monotonic_clock::min_time) {
987 // If we finally found our start time, update the header. Do this once
988 // because it should never change again.
989 log_file_header_.mutable_message()->mutate_monotonic_start_time(
990 new_monotonic_start_time.time_since_epoch().count());
991 log_file_header_.mutable_message()->mutate_realtime_start_time(
992 new_realtime_start_time.time_since_epoch().count());
993 }
994 }
995
Austin Schuh64fab802020-09-09 22:47:47 -0700996 // We don't have a good way to set the realtime start time on remote nodes.
997 // Confirm it remains consistent.
998 CHECK_EQ(log_file_header_.mutable_message()->has_realtime_start_time(),
999 message_reader.log_file_header()->has_realtime_start_time());
1000
1001 // Parts index will *not* match unless we set them to match. We only want
1002 // to accept the start time and parts mismatching, so set them.
1003 log_file_header_.mutable_message()->mutate_parts_index(
1004 message_reader.log_file_header()->parts_index());
1005
Austin Schuh2f8fd752020-09-01 22:38:28 -07001006 // Now compare that the headers match.
Austin Schuh64fab802020-09-09 22:47:47 -07001007 if (!CompareFlatBuffer(message_reader.raw_log_file_header(),
1008 log_file_header_)) {
Brian Silvermanae7c0332020-09-30 16:58:23 -07001009 if (message_reader.log_file_header()->has_log_event_uuid() &&
1010 log_file_header_.message().has_log_event_uuid() &&
1011 message_reader.log_file_header()->log_event_uuid()->string_view() !=
1012 log_file_header_.message().log_event_uuid()->string_view()) {
Austin Schuh64fab802020-09-09 22:47:47 -07001013 LOG(FATAL) << "Logger UUIDs don't match between log file chunks "
1014 << filenames_[0] << " and " << filenames_[i]
1015 << ", this is not supported.";
1016 }
1017 if (message_reader.log_file_header()->has_parts_uuid() &&
1018 log_file_header_.message().has_parts_uuid() &&
1019 message_reader.log_file_header()->parts_uuid()->string_view() !=
1020 log_file_header_.message().parts_uuid()->string_view()) {
1021 LOG(FATAL) << "Parts UUIDs don't match between log file chunks "
1022 << filenames_[0] << " and " << filenames_[i]
1023 << ", this is not supported.";
1024 }
1025
1026 LOG(FATAL) << "Header is different between log file chunks "
1027 << filenames_[0] << " and " << filenames_[i]
1028 << ", this is not supported.";
1029 }
Austin Schuh2f8fd752020-09-01 22:38:28 -07001030 }
Austin Schuh64fab802020-09-09 22:47:47 -07001031 // Put the parts index back to the first log file chunk.
1032 log_file_header_.mutable_message()->mutate_parts_index(
1033 message_reader_->log_file_header()->parts_index());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001034
Austin Schuh6f3babe2020-01-26 20:34:50 -08001035 // Setup per channel state.
Austin Schuh05b70472020-01-01 17:11:17 -08001036 channels_.resize(configuration()->channels()->size());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001037 for (ChannelData &channel_data : channels_) {
1038 channel_data.data.split_reader = this;
1039 // Build up the timestamp list.
1040 if (configuration::MultiNode(configuration())) {
1041 channel_data.timestamps.resize(configuration()->nodes()->size());
1042 for (MessageHeaderQueue &queue : channel_data.timestamps) {
1043 queue.timestamps = true;
1044 queue.split_reader = this;
1045 }
1046 }
1047 }
Austin Schuh05b70472020-01-01 17:11:17 -08001048
Austin Schuh6f3babe2020-01-26 20:34:50 -08001049 // Build up channels_to_write_ as an optimization to make it fast to figure
1050 // out which datastructure to place any new data from a channel on.
1051 for (const Channel *channel : *configuration()->channels()) {
1052 // This is the main case. We will only see data on this node.
1053 if (configuration::ChannelIsSendableOnNode(channel, node())) {
1054 channels_to_write_.emplace_back(
1055 &channels_[channels_to_write_.size()].data);
1056 } else
1057 // If we can't send, but can receive, we should be able to see
1058 // timestamps here.
1059 if (configuration::ChannelIsReadableOnNode(channel, node())) {
1060 channels_to_write_.emplace_back(
1061 &(channels_[channels_to_write_.size()]
1062 .timestamps[configuration::GetNodeIndex(configuration(),
1063 node())]));
1064 } else {
1065 channels_to_write_.emplace_back(nullptr);
1066 }
1067 }
Austin Schuh05b70472020-01-01 17:11:17 -08001068}
1069
Austin Schuh6f3babe2020-01-26 20:34:50 -08001070bool SplitMessageReader::NextLogFile() {
Austin Schuhfa895892020-01-07 20:07:41 -08001071 if (next_filename_index_ == filenames_.size()) {
1072 return false;
1073 }
1074 message_reader_ =
1075 std::make_unique<MessageReader>(filenames_[next_filename_index_]);
1076
1077 // We can't support the config diverging between two log file headers. See if
1078 // they are the same.
1079 if (next_filename_index_ != 0) {
Austin Schuh64fab802020-09-09 22:47:47 -07001080 // In order for the headers to identically compare, they need to have the
1081 // same parts_index. Rewrite the saved header with the new parts_index,
1082 // compare, and then restore.
1083 const int32_t original_parts_index =
1084 log_file_header_.message().parts_index();
1085 log_file_header_.mutable_message()->mutate_parts_index(
1086 message_reader_->log_file_header()->parts_index());
1087
Austin Schuh97789fc2020-08-01 14:42:45 -07001088 CHECK(CompareFlatBuffer(message_reader_->raw_log_file_header(),
1089 log_file_header_))
Austin Schuhfa895892020-01-07 20:07:41 -08001090 << ": Header is different between log file chunks "
1091 << filenames_[next_filename_index_] << " and "
1092 << filenames_[next_filename_index_ - 1] << ", this is not supported.";
Austin Schuh64fab802020-09-09 22:47:47 -07001093
1094 log_file_header_.mutable_message()->mutate_parts_index(
1095 original_parts_index);
Austin Schuhfa895892020-01-07 20:07:41 -08001096 }
1097
1098 ++next_filename_index_;
1099 return true;
1100}
1101
Austin Schuh6f3babe2020-01-26 20:34:50 -08001102bool SplitMessageReader::QueueMessages(
Austin Schuhcde938c2020-02-02 17:30:07 -08001103 monotonic_clock::time_point last_dequeued_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001104 // TODO(austin): Once we are happy that everything works, read a 256kb chunk
1105 // to reduce the need to re-heap down below.
Austin Schuhcde938c2020-02-02 17:30:07 -08001106
1107 // Special case no more data. Otherwise we blow up on the CHECK statement
1108 // confirming that we have enough data queued.
1109 if (at_end_) {
1110 return false;
1111 }
1112
1113 // If this isn't the first time around, confirm that we had enough data queued
1114 // to follow the contract.
1115 if (time_to_queue_ != monotonic_clock::min_time) {
1116 CHECK_LE(last_dequeued_time,
1117 newest_timestamp() - max_out_of_order_duration())
1118 << " node " << FlatbufferToJson(node()) << " on " << this;
1119
1120 // Bail if there is enough data already queued.
1121 if (last_dequeued_time < time_to_queue_) {
Austin Schuhee711052020-08-24 16:06:09 -07001122 VLOG(1) << MaybeNodeName(target_node_) << "All up to date on " << this
1123 << ", dequeued " << last_dequeued_time << " queue time "
1124 << time_to_queue_;
Austin Schuhcde938c2020-02-02 17:30:07 -08001125 return true;
1126 }
1127 } else {
1128 // Startup takes a special dance. We want to queue up until the start time,
1129 // but we then want to find the next message to read. The conservative
1130 // answer is to immediately trigger a second requeue to get things moving.
1131 time_to_queue_ = monotonic_start_time();
Austin Schuheeba0292020-10-11 16:20:05 -07001132 CHECK_NE(time_to_queue_, monotonic_clock::min_time);
Austin Schuhcde938c2020-02-02 17:30:07 -08001133 QueueMessages(time_to_queue_);
1134 }
1135
1136 // If we are asked to queue, queue for at least max_out_of_order_duration past
1137 // the last known time in the log file (ie the newest timestep read). As long
1138 // as we requeue exactly when time_to_queue_ is dequeued and go no further, we
1139 // are safe. And since we pop in order, that works.
1140 //
1141 // Special case the start of the log file. There should be at most 1 message
1142 // from each channel at the start of the log file. So always force the start
1143 // of the log file to just be read.
1144 time_to_queue_ = std::max(time_to_queue_, newest_timestamp());
Austin Schuhee711052020-08-24 16:06:09 -07001145 VLOG(1) << MaybeNodeName(target_node_) << "Queueing, going until "
1146 << time_to_queue_ << " " << filename();
Austin Schuhcde938c2020-02-02 17:30:07 -08001147
1148 bool was_emplaced = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -08001149 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001150 // Stop if we have enough.
Brian Silverman98360e22020-04-28 16:51:20 -07001151 if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&
Austin Schuhcde938c2020-02-02 17:30:07 -08001152 was_emplaced) {
Austin Schuhee711052020-08-24 16:06:09 -07001153 VLOG(1) << MaybeNodeName(target_node_) << "Done queueing on " << this
1154 << ", queued to " << newest_timestamp() << " with requeue time "
1155 << time_to_queue_;
Austin Schuh6f3babe2020-01-26 20:34:50 -08001156 return true;
1157 }
Austin Schuh05b70472020-01-01 17:11:17 -08001158
Austin Schuhadd6eb32020-11-09 21:24:26 -08001159 if (std::optional<SizePrefixedFlatbufferVector<MessageHeader>> msg =
Austin Schuh6f3babe2020-01-26 20:34:50 -08001160 message_reader_->ReadMessage()) {
1161 const MessageHeader &header = msg.value().message();
1162
Austin Schuhcde938c2020-02-02 17:30:07 -08001163 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
1164 chrono::nanoseconds(header.monotonic_sent_time()));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001165
Austin Schuh0b5fd032020-03-28 17:36:49 -07001166 if (VLOG_IS_ON(2)) {
Brian Silvermand90905f2020-09-23 14:42:56 -07001167 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
1168 << filename() << " ttq: " << time_to_queue_ << " now "
Austin Schuhee711052020-08-24 16:06:09 -07001169 << newest_timestamp() << " start time "
1170 << monotonic_start_time() << " " << FlatbufferToJson(&header);
Austin Schuh0b5fd032020-03-28 17:36:49 -07001171 } else if (VLOG_IS_ON(1)) {
Austin Schuhadd6eb32020-11-09 21:24:26 -08001172 SizePrefixedFlatbufferVector<MessageHeader> copy = msg.value();
Austin Schuh0b5fd032020-03-28 17:36:49 -07001173 copy.mutable_message()->clear_data();
Austin Schuhee711052020-08-24 16:06:09 -07001174 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
1175 << filename() << " ttq: " << time_to_queue_ << " now "
1176 << newest_timestamp() << " start time "
1177 << monotonic_start_time() << " " << FlatbufferToJson(copy);
Austin Schuh0b5fd032020-03-28 17:36:49 -07001178 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001179
1180 const int channel_index = header.channel_index();
1181 was_emplaced = channels_to_write_[channel_index]->emplace_back(
1182 std::move(msg.value()));
1183 if (was_emplaced) {
1184 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
1185 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001186 } else {
1187 if (!NextLogFile()) {
Austin Schuhee711052020-08-24 16:06:09 -07001188 VLOG(1) << MaybeNodeName(target_node_) << "No more files, last was "
1189 << filenames_.back();
Austin Schuhcde938c2020-02-02 17:30:07 -08001190 at_end_ = true;
Austin Schuh8bd96322020-02-13 21:18:22 -08001191 for (MessageHeaderQueue *queue : channels_to_write_) {
1192 if (queue == nullptr || queue->timestamp_merger == nullptr) {
1193 continue;
1194 }
1195 queue->timestamp_merger->NoticeAtEnd();
1196 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001197 return false;
1198 }
1199 }
Austin Schuh05b70472020-01-01 17:11:17 -08001200 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001201}
1202
1203void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,
1204 int channel_index,
1205 const Node *target_node) {
1206 const Node *reinterpreted_target_node =
1207 configuration::GetNodeOrDie(configuration(), target_node);
Austin Schuhee711052020-08-24 16:06:09 -07001208 target_node_ = reinterpreted_target_node;
1209
Austin Schuh6f3babe2020-01-26 20:34:50 -08001210 const Channel *const channel =
1211 configuration()->channels()->Get(channel_index);
1212
Austin Schuhcde938c2020-02-02 17:30:07 -08001213 VLOG(1) << " Configuring merger " << this << " for channel " << channel_index
1214 << " "
1215 << configuration::CleanedChannelToString(
1216 configuration()->channels()->Get(channel_index));
1217
Austin Schuh6f3babe2020-01-26 20:34:50 -08001218 MessageHeaderQueue *message_header_queue = nullptr;
1219
1220 // Figure out if this log file is from our point of view, or the other node's
1221 // point of view.
1222 if (node() == reinterpreted_target_node) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001223 VLOG(1) << " Replaying as logged node " << filename();
1224
1225 if (configuration::ChannelIsSendableOnNode(channel, node())) {
1226 VLOG(1) << " Data on node";
1227 message_header_queue = &(channels_[channel_index].data);
1228 } else if (configuration::ChannelIsReadableOnNode(channel, node())) {
1229 VLOG(1) << " Timestamps on node";
1230 message_header_queue =
1231 &(channels_[channel_index].timestamps[configuration::GetNodeIndex(
1232 configuration(), node())]);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001233 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -08001234 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001235 }
1236 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -08001237 VLOG(1) << " Replaying as other node " << filename();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001238 // We are replaying from another node's point of view. The only interesting
Austin Schuhcde938c2020-02-02 17:30:07 -08001239 // data is data that is sent from our node and received on theirs.
1240 if (configuration::ChannelIsReadableOnNode(channel,
1241 reinterpreted_target_node) &&
1242 configuration::ChannelIsSendableOnNode(channel, node())) {
1243 VLOG(1) << " Readable on target node";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001244 // Data from another node.
1245 message_header_queue = &(channels_[channel_index].data);
1246 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -08001247 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001248 // This is either not sendable on the other node, or is a timestamp and
1249 // therefore not interesting.
1250 }
1251 }
1252
1253 // If we found one, write it down. This will be nullptr when there is nothing
1254 // relevant on this channel on this node for the target node. In that case,
1255 // we want to drop the message instead of queueing it.
1256 if (message_header_queue != nullptr) {
1257 message_header_queue->timestamp_merger = timestamp_merger;
1258 }
1259}
1260
1261std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001262 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001263SplitMessageReader::PopOldest(int channel_index) {
1264 CHECK_GT(channels_[channel_index].data.size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -08001265 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1266 timestamp = channels_[channel_index].data.front_timestamp();
Austin Schuhadd6eb32020-11-09 21:24:26 -08001267 SizePrefixedFlatbufferVector<MessageHeader> front =
Austin Schuh6f3babe2020-01-26 20:34:50 -08001268 std::move(channels_[channel_index].data.front());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001269 channels_[channel_index].data.PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -08001270
Austin Schuh2f8fd752020-09-01 22:38:28 -07001271 VLOG(1) << MaybeNodeName(target_node_) << "Popped Data " << this << " "
1272 << std::get<0>(timestamp) << " for "
1273 << configuration::StrippedChannelToString(
1274 configuration()->channels()->Get(channel_index))
1275 << " (" << channel_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -08001276
1277 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001278
1279 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
1280 std::move(front));
1281}
1282
1283std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001284 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -07001285SplitMessageReader::PopOldestTimestamp(int channel, int node_index) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001286 CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -08001287 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1288 timestamp = channels_[channel].timestamps[node_index].front_timestamp();
Austin Schuhadd6eb32020-11-09 21:24:26 -08001289 SizePrefixedFlatbufferVector<MessageHeader> front =
Austin Schuh6f3babe2020-01-26 20:34:50 -08001290 std::move(channels_[channel].timestamps[node_index].front());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001291 channels_[channel].timestamps[node_index].PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -08001292
Austin Schuh2f8fd752020-09-01 22:38:28 -07001293 VLOG(1) << MaybeNodeName(target_node_) << "Popped timestamp " << this << " "
Austin Schuhee711052020-08-24 16:06:09 -07001294 << std::get<0>(timestamp) << " for "
1295 << configuration::StrippedChannelToString(
1296 configuration()->channels()->Get(channel))
Austin Schuh2f8fd752020-09-01 22:38:28 -07001297 << " on "
1298 << configuration()->nodes()->Get(node_index)->name()->string_view()
1299 << " (" << node_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -08001300
1301 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001302
1303 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
1304 std::move(front));
1305}
1306
Austin Schuhcde938c2020-02-02 17:30:07 -08001307bool SplitMessageReader::MessageHeaderQueue::emplace_back(
Austin Schuhadd6eb32020-11-09 21:24:26 -08001308 SizePrefixedFlatbufferVector<MessageHeader> &&msg) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001309 CHECK(split_reader != nullptr);
1310
1311 // If there is no timestamp merger for this queue, nobody is listening. Drop
1312 // the message. This happens when a log file from another node is replayed,
1313 // and the timestamp mergers down stream just don't care.
1314 if (timestamp_merger == nullptr) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001315 return false;
Austin Schuh6f3babe2020-01-26 20:34:50 -08001316 }
1317
1318 CHECK(timestamps != msg.message().has_data())
1319 << ": Got timestamps and data mixed up on a node. "
1320 << FlatbufferToJson(msg);
1321
1322 data_.emplace_back(std::move(msg));
1323
1324 if (data_.size() == 1u) {
1325 // Yup, new data. Notify.
1326 if (timestamps) {
1327 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
1328 } else {
1329 timestamp_merger->Update(split_reader, front_timestamp());
1330 }
1331 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001332
1333 return true;
Austin Schuh6f3babe2020-01-26 20:34:50 -08001334}
1335
Austin Schuh2f8fd752020-09-01 22:38:28 -07001336void SplitMessageReader::MessageHeaderQueue::PopFront() {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001337 data_.pop_front();
1338 if (data_.size() != 0u) {
1339 // Yup, new data.
1340 if (timestamps) {
1341 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
1342 } else {
1343 timestamp_merger->Update(split_reader, front_timestamp());
1344 }
Austin Schuh2f8fd752020-09-01 22:38:28 -07001345 } else {
1346 // Poke anyways to update the heap.
1347 if (timestamps) {
1348 timestamp_merger->UpdateTimestamp(
1349 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
1350 } else {
1351 timestamp_merger->Update(
1352 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
1353 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001354 }
Austin Schuh05b70472020-01-01 17:11:17 -08001355}
1356
1357namespace {
1358
Austin Schuh6f3babe2020-01-26 20:34:50 -08001359bool SplitMessageReaderHeapCompare(
1360 const std::tuple<monotonic_clock::time_point, uint32_t,
1361 SplitMessageReader *>
1362 first,
1363 const std::tuple<monotonic_clock::time_point, uint32_t,
1364 SplitMessageReader *>
1365 second) {
1366 if (std::get<0>(first) > std::get<0>(second)) {
1367 return true;
1368 } else if (std::get<0>(first) == std::get<0>(second)) {
1369 if (std::get<1>(first) > std::get<1>(second)) {
1370 return true;
1371 } else if (std::get<1>(first) == std::get<1>(second)) {
1372 return std::get<2>(first) > std::get<2>(second);
1373 } else {
1374 return false;
1375 }
1376 } else {
1377 return false;
1378 }
1379}
1380
Austin Schuh05b70472020-01-01 17:11:17 -08001381bool ChannelHeapCompare(
1382 const std::pair<monotonic_clock::time_point, int> first,
1383 const std::pair<monotonic_clock::time_point, int> second) {
1384 if (first.first > second.first) {
1385 return true;
1386 } else if (first.first == second.first) {
1387 return first.second > second.second;
1388 } else {
1389 return false;
1390 }
1391}
1392
1393} // namespace
1394
Austin Schuh6f3babe2020-01-26 20:34:50 -08001395TimestampMerger::TimestampMerger(
1396 const Configuration *configuration,
1397 std::vector<SplitMessageReader *> split_message_readers, int channel_index,
1398 const Node *target_node, ChannelMerger *channel_merger)
1399 : configuration_(configuration),
1400 split_message_readers_(std::move(split_message_readers)),
1401 channel_index_(channel_index),
1402 node_index_(configuration::MultiNode(configuration)
1403 ? configuration::GetNodeIndex(configuration, target_node)
1404 : -1),
1405 channel_merger_(channel_merger) {
1406 // Tell the readers we care so they know who to notify.
Austin Schuhcde938c2020-02-02 17:30:07 -08001407 VLOG(1) << "Configuring channel " << channel_index << " target node "
1408 << FlatbufferToJson(target_node);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001409 for (SplitMessageReader *reader : split_message_readers_) {
1410 reader->SetTimestampMerger(this, channel_index, target_node);
1411 }
1412
1413 // And then determine if we need to track timestamps.
1414 const Channel *channel = configuration->channels()->Get(channel_index);
1415 if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&
1416 configuration::ChannelIsReadableOnNode(channel, target_node)) {
1417 has_timestamps_ = true;
1418 }
1419}
1420
1421void TimestampMerger::PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -08001422 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1423 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -08001424 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001425 if (split_message_reader != nullptr) {
1426 DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),
1427 [split_message_reader](
1428 const std::tuple<monotonic_clock::time_point,
1429 uint32_t, SplitMessageReader *>
1430 x) {
1431 return std::get<2>(x) == split_message_reader;
1432 }) == message_heap_.end())
1433 << ": Pushing message when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001434
Austin Schuh2f8fd752020-09-01 22:38:28 -07001435 message_heap_.push_back(std::make_tuple(
1436 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001437
Austin Schuh2f8fd752020-09-01 22:38:28 -07001438 std::push_heap(message_heap_.begin(), message_heap_.end(),
1439 &SplitMessageReaderHeapCompare);
1440 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001441
1442 // If we are just a data merger, don't wait for timestamps.
1443 if (!has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001444 if (!message_heap_.empty()) {
1445 channel_merger_->Update(std::get<0>(message_heap_[0]), channel_index_);
1446 pushed_ = true;
1447 } else {
1448 // Remove ourselves if we are empty.
1449 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
1450 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001451 }
1452}
1453
Austin Schuhcde938c2020-02-02 17:30:07 -08001454std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1455TimestampMerger::oldest_message() const {
1456 CHECK_GT(message_heap_.size(), 0u);
1457 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1458 oldest_message_reader = message_heap_.front();
1459 return std::get<2>(oldest_message_reader)->oldest_message(channel_index_);
1460}
1461
1462std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1463TimestampMerger::oldest_timestamp() const {
1464 CHECK_GT(timestamp_heap_.size(), 0u);
1465 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1466 oldest_message_reader = timestamp_heap_.front();
1467 return std::get<2>(oldest_message_reader)
1468 ->oldest_message(channel_index_, node_index_);
1469}
1470
Austin Schuh6f3babe2020-01-26 20:34:50 -08001471void TimestampMerger::PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -08001472 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1473 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -08001474 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001475 if (split_message_reader != nullptr) {
1476 DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),
1477 [split_message_reader](
1478 const std::tuple<monotonic_clock::time_point,
1479 uint32_t, SplitMessageReader *>
1480 x) {
1481 return std::get<2>(x) == split_message_reader;
1482 }) == timestamp_heap_.end())
1483 << ": Pushing timestamp when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001484
Austin Schuh2f8fd752020-09-01 22:38:28 -07001485 timestamp_heap_.push_back(std::make_tuple(
1486 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001487
Austin Schuh2f8fd752020-09-01 22:38:28 -07001488 std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1489 SplitMessageReaderHeapCompare);
1490 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001491
1492 // If we are a timestamp merger, don't wait for data. Missing data will be
1493 // caught at read time.
1494 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001495 if (!timestamp_heap_.empty()) {
1496 channel_merger_->Update(std::get<0>(timestamp_heap_[0]), channel_index_);
1497 pushed_ = true;
1498 } else {
1499 // Remove ourselves if we are empty.
1500 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
1501 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001502 }
1503}
1504
1505std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001506 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001507TimestampMerger::PopMessageHeap() {
1508 // Pop the oldest message reader pointer off the heap.
1509 CHECK_GT(message_heap_.size(), 0u);
1510 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1511 oldest_message_reader = message_heap_.front();
1512
1513 std::pop_heap(message_heap_.begin(), message_heap_.end(),
1514 &SplitMessageReaderHeapCompare);
1515 message_heap_.pop_back();
1516
1517 // Pop the oldest message. This re-pushes any messages from the reader to the
1518 // message heap.
1519 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001520 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001521 oldest_message =
1522 std::get<2>(oldest_message_reader)->PopOldest(channel_index_);
1523
1524 // Confirm that the time and queue_index we have recorded matches.
1525 CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));
1526 CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));
1527
1528 // Now, keep reading until we have found all duplicates.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001529 while (!message_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001530 // See if it is a duplicate.
1531 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1532 next_oldest_message_reader = message_heap_.front();
1533
Austin Schuhcde938c2020-02-02 17:30:07 -08001534 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1535 next_oldest_message_time = std::get<2>(next_oldest_message_reader)
1536 ->oldest_message(channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001537
1538 if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&
1539 std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {
1540 // Pop the message reader pointer.
1541 std::pop_heap(message_heap_.begin(), message_heap_.end(),
1542 &SplitMessageReaderHeapCompare);
1543 message_heap_.pop_back();
1544
1545 // Pop the next oldest message. This re-pushes any messages from the
1546 // reader.
1547 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001548 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001549 next_oldest_message = std::get<2>(next_oldest_message_reader)
1550 ->PopOldest(channel_index_);
1551
1552 // And make sure the message matches in it's entirety.
1553 CHECK(std::get<2>(oldest_message).span() ==
1554 std::get<2>(next_oldest_message).span())
1555 << ": Data at the same timestamp doesn't match.";
1556 } else {
1557 break;
1558 }
1559 }
1560
1561 return oldest_message;
1562}
1563
1564std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001565 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001566TimestampMerger::PopTimestampHeap() {
1567 // Pop the oldest message reader pointer off the heap.
1568 CHECK_GT(timestamp_heap_.size(), 0u);
1569
1570 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1571 oldest_timestamp_reader = timestamp_heap_.front();
1572
1573 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1574 &SplitMessageReaderHeapCompare);
1575 timestamp_heap_.pop_back();
1576
1577 CHECK(node_index_ != -1) << ": Timestamps in a single node environment";
1578
1579 // Pop the oldest message. This re-pushes any timestamps from the reader to
1580 // the timestamp heap.
1581 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001582 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001583 oldest_timestamp = std::get<2>(oldest_timestamp_reader)
Austin Schuh2f8fd752020-09-01 22:38:28 -07001584 ->PopOldestTimestamp(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001585
1586 // Confirm that the time we have recorded matches.
1587 CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));
1588 CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));
1589
Austin Schuh2f8fd752020-09-01 22:38:28 -07001590 // Now, keep reading until we have found all duplicates.
1591 while (!timestamp_heap_.empty()) {
1592 // See if it is a duplicate.
1593 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1594 next_oldest_timestamp_reader = timestamp_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001595
Austin Schuh2f8fd752020-09-01 22:38:28 -07001596 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1597 next_oldest_timestamp_time =
1598 std::get<2>(next_oldest_timestamp_reader)
1599 ->oldest_message(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001600
Austin Schuh2f8fd752020-09-01 22:38:28 -07001601 if (std::get<0>(next_oldest_timestamp_time) ==
1602 std::get<0>(oldest_timestamp) &&
1603 std::get<1>(next_oldest_timestamp_time) ==
1604 std::get<1>(oldest_timestamp)) {
1605 // Pop the timestamp reader pointer.
1606 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1607 &SplitMessageReaderHeapCompare);
1608 timestamp_heap_.pop_back();
1609
1610 // Pop the next oldest timestamp. This re-pushes any messages from the
1611 // reader.
1612 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001613 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -07001614 next_oldest_timestamp =
1615 std::get<2>(next_oldest_timestamp_reader)
1616 ->PopOldestTimestamp(channel_index_, node_index_);
1617
1618 // And make sure the contents matches in it's entirety.
1619 CHECK(std::get<2>(oldest_timestamp).span() ==
1620 std::get<2>(next_oldest_timestamp).span())
1621 << ": Data at the same timestamp doesn't match, "
1622 << aos::FlatbufferToJson(std::get<2>(oldest_timestamp)) << " vs "
1623 << aos::FlatbufferToJson(std::get<2>(next_oldest_timestamp)) << " "
1624 << absl::BytesToHexString(std::string_view(
1625 reinterpret_cast<const char *>(
1626 std::get<2>(oldest_timestamp).span().data()),
1627 std::get<2>(oldest_timestamp).span().size()))
1628 << " vs "
1629 << absl::BytesToHexString(std::string_view(
1630 reinterpret_cast<const char *>(
1631 std::get<2>(next_oldest_timestamp).span().data()),
1632 std::get<2>(next_oldest_timestamp).span().size()));
1633
1634 } else {
1635 break;
1636 }
Austin Schuh8bd96322020-02-13 21:18:22 -08001637 }
1638
Austin Schuh2f8fd752020-09-01 22:38:28 -07001639 return oldest_timestamp;
Austin Schuh8bd96322020-02-13 21:18:22 -08001640}
1641
Austin Schuhadd6eb32020-11-09 21:24:26 -08001642std::tuple<TimestampMerger::DeliveryTimestamp,
1643 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001644TimestampMerger::PopOldest() {
1645 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001646 VLOG(1) << "Looking for matching timestamp for "
1647 << configuration::StrippedChannelToString(
1648 configuration_->channels()->Get(channel_index_))
1649 << " (" << channel_index_ << ") "
1650 << " at " << std::get<0>(oldest_timestamp());
1651
Austin Schuh8bd96322020-02-13 21:18:22 -08001652 // Read the timestamps.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001653 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001654 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001655 oldest_timestamp = PopTimestampHeap();
1656
1657 TimestampMerger::DeliveryTimestamp timestamp;
1658 timestamp.monotonic_event_time =
1659 monotonic_clock::time_point(chrono::nanoseconds(
1660 std::get<2>(oldest_timestamp).message().monotonic_sent_time()));
1661 timestamp.realtime_event_time =
1662 realtime_clock::time_point(chrono::nanoseconds(
1663 std::get<2>(oldest_timestamp).message().realtime_sent_time()));
Austin Schuh8d7e0bb2020-10-02 17:57:00 -07001664 timestamp.queue_index =
1665 std::get<2>(oldest_timestamp).message().queue_index();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001666
1667 // Consistency check.
1668 CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));
1669 CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),
1670 std::get<1>(oldest_timestamp));
1671
1672 monotonic_clock::time_point remote_timestamp_monotonic_time(
1673 chrono::nanoseconds(
1674 std::get<2>(oldest_timestamp).message().monotonic_remote_time()));
1675
Austin Schuh8bd96322020-02-13 21:18:22 -08001676 // See if we have any data. If not, pass the problem up the chain.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001677 if (message_heap_.empty()) {
Austin Schuhee711052020-08-24 16:06:09 -07001678 LOG(WARNING) << MaybeNodeName(configuration_->nodes()->Get(node_index_))
1679 << "No data to match timestamp on "
1680 << configuration::CleanedChannelToString(
1681 configuration_->channels()->Get(channel_index_))
1682 << " (" << channel_index_ << ")";
Austin Schuh8bd96322020-02-13 21:18:22 -08001683 return std::make_tuple(timestamp,
1684 std::move(std::get<2>(oldest_timestamp)));
1685 }
1686
Austin Schuh6f3babe2020-01-26 20:34:50 -08001687 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001688 {
1689 // Ok, now try grabbing data until we find one which matches.
1690 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1691 oldest_message_ref = oldest_message();
1692
1693 // Time at which the message was sent (this message is written from the
1694 // sending node's perspective.
1695 monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(
1696 std::get<2>(oldest_message_ref)->monotonic_sent_time()));
1697
1698 if (remote_monotonic_time < remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001699 LOG(WARNING) << configuration_->nodes()
1700 ->Get(node_index_)
1701 ->name()
1702 ->string_view()
1703 << " Undelivered message, skipping. Remote time is "
1704 << remote_monotonic_time << " timestamp is "
1705 << remote_timestamp_monotonic_time << " on channel "
1706 << configuration::StrippedChannelToString(
1707 configuration_->channels()->Get(channel_index_))
1708 << " (" << channel_index_ << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -08001709 PopMessageHeap();
1710 continue;
1711 } else if (remote_monotonic_time > remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001712 LOG(WARNING) << configuration_->nodes()
1713 ->Get(node_index_)
1714 ->name()
1715 ->string_view()
1716 << " Data not found. Remote time should be "
1717 << remote_timestamp_monotonic_time
1718 << ", message time is " << remote_monotonic_time
1719 << " on channel "
1720 << configuration::StrippedChannelToString(
1721 configuration_->channels()->Get(channel_index_))
Austin Schuh2f8fd752020-09-01 22:38:28 -07001722 << " (" << channel_index_ << ")"
1723 << (VLOG_IS_ON(1) ? DebugString() : "");
Austin Schuhcde938c2020-02-02 17:30:07 -08001724 return std::make_tuple(timestamp,
1725 std::move(std::get<2>(oldest_timestamp)));
1726 }
1727
1728 timestamp.monotonic_remote_time = remote_monotonic_time;
1729 }
1730
Austin Schuh2f8fd752020-09-01 22:38:28 -07001731 VLOG(1) << "Found matching data "
1732 << configuration::StrippedChannelToString(
1733 configuration_->channels()->Get(channel_index_))
1734 << " (" << channel_index_ << ")";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001735 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001736 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001737 oldest_message = PopMessageHeap();
1738
Austin Schuh6f3babe2020-01-26 20:34:50 -08001739 timestamp.realtime_remote_time =
1740 realtime_clock::time_point(chrono::nanoseconds(
1741 std::get<2>(oldest_message).message().realtime_sent_time()));
1742 timestamp.remote_queue_index =
1743 std::get<2>(oldest_message).message().queue_index();
1744
Austin Schuhcde938c2020-02-02 17:30:07 -08001745 CHECK_EQ(timestamp.monotonic_remote_time,
1746 remote_timestamp_monotonic_time);
1747
1748 CHECK_EQ(timestamp.remote_queue_index,
1749 std::get<2>(oldest_timestamp).message().remote_queue_index())
1750 << ": " << FlatbufferToJson(&std::get<2>(oldest_timestamp).message())
1751 << " data "
1752 << FlatbufferToJson(&std::get<2>(oldest_message).message());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001753
Austin Schuh30dd5c52020-08-01 14:43:44 -07001754 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001755 }
1756 } else {
1757 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001758 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001759 oldest_message = PopMessageHeap();
1760
1761 TimestampMerger::DeliveryTimestamp timestamp;
1762 timestamp.monotonic_event_time =
1763 monotonic_clock::time_point(chrono::nanoseconds(
1764 std::get<2>(oldest_message).message().monotonic_sent_time()));
1765 timestamp.realtime_event_time =
1766 realtime_clock::time_point(chrono::nanoseconds(
1767 std::get<2>(oldest_message).message().realtime_sent_time()));
Austin Schuh8d7e0bb2020-10-02 17:57:00 -07001768 timestamp.queue_index = std::get<2>(oldest_message).message().queue_index();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001769 timestamp.remote_queue_index = 0xffffffff;
1770
1771 CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);
1772 CHECK_EQ(std::get<1>(oldest_message),
1773 std::get<2>(oldest_message).message().queue_index());
1774
Austin Schuh30dd5c52020-08-01 14:43:44 -07001775 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001776 }
1777}
1778
Austin Schuh8bd96322020-02-13 21:18:22 -08001779void TimestampMerger::NoticeAtEnd() { channel_merger_->NoticeAtEnd(); }
1780
Austin Schuh6f3babe2020-01-26 20:34:50 -08001781namespace {
1782std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(
1783 const std::vector<std::vector<std::string>> &filenames) {
1784 CHECK_GT(filenames.size(), 0u);
1785 // Build up all the SplitMessageReaders.
1786 std::vector<std::unique_ptr<SplitMessageReader>> result;
1787 for (const std::vector<std::string> &filenames : filenames) {
1788 result.emplace_back(std::make_unique<SplitMessageReader>(filenames));
1789 }
1790 return result;
1791}
1792} // namespace
1793
1794ChannelMerger::ChannelMerger(
1795 const std::vector<std::vector<std::string>> &filenames)
1796 : split_message_readers_(MakeSplitMessageReaders(filenames)),
Austin Schuh97789fc2020-08-01 14:42:45 -07001797 log_file_header_(split_message_readers_[0]->raw_log_file_header()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001798 // Now, confirm that the configuration matches for each and pick a start time.
1799 // Also return the list of possible nodes.
1800 for (const std::unique_ptr<SplitMessageReader> &reader :
1801 split_message_readers_) {
1802 CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),
1803 reader->log_file_header()->configuration()))
1804 << ": Replaying log files with different configurations isn't "
1805 "supported";
1806 }
1807
1808 nodes_ = configuration::GetNodes(configuration());
1809}
1810
1811bool ChannelMerger::SetNode(const Node *target_node) {
1812 std::vector<SplitMessageReader *> split_message_readers;
1813 for (const std::unique_ptr<SplitMessageReader> &reader :
1814 split_message_readers_) {
1815 split_message_readers.emplace_back(reader.get());
1816 }
1817
1818 // Go find a log_file_header for this node.
1819 {
1820 bool found_node = false;
1821
1822 for (const std::unique_ptr<SplitMessageReader> &reader :
1823 split_message_readers_) {
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001824 // In order to identify which logfile(s) map to the target node, do a
1825 // logical comparison of the nodes, by confirming that we are either in a
1826 // single-node setup (where the nodes will both be nullptr) or that the
1827 // node names match (but the other node fields--e.g., hostname lists--may
1828 // not).
1829 const bool both_null =
1830 reader->node() == nullptr && target_node == nullptr;
1831 const bool both_have_name =
1832 (reader->node() != nullptr) && (target_node != nullptr) &&
1833 (reader->node()->has_name() && target_node->has_name());
1834 const bool node_names_identical =
Brian Silvermand90905f2020-09-23 14:42:56 -07001835 both_have_name && (reader->node()->name()->string_view() ==
1836 target_node->name()->string_view());
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001837 if (both_null || node_names_identical) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001838 if (!found_node) {
1839 found_node = true;
Austin Schuhadd6eb32020-11-09 21:24:26 -08001840 log_file_header_ = reader->raw_log_file_header();
Austin Schuhcde938c2020-02-02 17:30:07 -08001841 VLOG(1) << "Found log file " << reader->filename() << " with node "
1842 << FlatbufferToJson(reader->node()) << " start_time "
1843 << monotonic_start_time();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001844 } else {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001845 // Find the earliest start time. That way, if we get a full log file
1846 // directly from the node, and a partial later, we start with the
1847 // full. Update our header to match that.
1848 const monotonic_clock::time_point new_monotonic_start_time(
1849 chrono::nanoseconds(
1850 reader->log_file_header()->monotonic_start_time()));
1851 const realtime_clock::time_point new_realtime_start_time(
1852 chrono::nanoseconds(
1853 reader->log_file_header()->realtime_start_time()));
1854
1855 if (monotonic_start_time() == monotonic_clock::min_time ||
1856 (new_monotonic_start_time != monotonic_clock::min_time &&
1857 new_monotonic_start_time < monotonic_start_time())) {
1858 log_file_header_.mutable_message()->mutate_monotonic_start_time(
1859 new_monotonic_start_time.time_since_epoch().count());
1860 log_file_header_.mutable_message()->mutate_realtime_start_time(
1861 new_realtime_start_time.time_since_epoch().count());
1862 VLOG(1) << "Updated log file " << reader->filename()
1863 << " with node " << FlatbufferToJson(reader->node())
1864 << " start_time " << new_monotonic_start_time;
1865 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001866 }
1867 }
1868 }
1869
1870 if (!found_node) {
1871 LOG(WARNING) << "Failed to find log file for node "
1872 << FlatbufferToJson(target_node);
1873 return false;
1874 }
1875 }
1876
1877 // Build up all the timestamp mergers. This connects up all the
1878 // SplitMessageReaders.
1879 timestamp_mergers_.reserve(configuration()->channels()->size());
1880 for (size_t channel_index = 0;
1881 channel_index < configuration()->channels()->size(); ++channel_index) {
1882 timestamp_mergers_.emplace_back(
1883 configuration(), split_message_readers, channel_index,
1884 configuration::GetNode(configuration(), target_node), this);
1885 }
1886
1887 // And prime everything.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001888 for (std::unique_ptr<SplitMessageReader> &split_message_reader :
1889 split_message_readers_) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001890 split_message_reader->QueueMessages(
1891 split_message_reader->monotonic_start_time());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001892 }
1893
1894 node_ = configuration::GetNodeOrDie(configuration(), target_node);
1895 return true;
1896}
1897
Austin Schuh858c9f32020-08-31 16:56:12 -07001898monotonic_clock::time_point ChannelMerger::OldestMessageTime() const {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001899 if (channel_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001900 return monotonic_clock::max_time;
1901 }
1902 return channel_heap_.front().first;
1903}
1904
1905void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,
1906 int channel_index) {
1907 // Pop and recreate the heap if it has already been pushed. And since we are
1908 // pushing again, we don't need to clear pushed.
1909 if (timestamp_mergers_[channel_index].pushed()) {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001910 const auto channel_iterator = std::find_if(
Austin Schuh6f3babe2020-01-26 20:34:50 -08001911 channel_heap_.begin(), channel_heap_.end(),
1912 [channel_index](const std::pair<monotonic_clock::time_point, int> x) {
1913 return x.second == channel_index;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001914 });
1915 DCHECK(channel_iterator != channel_heap_.end());
1916 if (std::get<0>(*channel_iterator) == timestamp) {
1917 // It's already in the heap, in the correct spot, so nothing
1918 // more for us to do here.
1919 return;
1920 }
1921 channel_heap_.erase(channel_iterator);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001922 std::make_heap(channel_heap_.begin(), channel_heap_.end(),
1923 ChannelHeapCompare);
1924 }
1925
Austin Schuh2f8fd752020-09-01 22:38:28 -07001926 if (timestamp == monotonic_clock::min_time) {
1927 timestamp_mergers_[channel_index].set_pushed(false);
1928 return;
1929 }
1930
Austin Schuh05b70472020-01-01 17:11:17 -08001931 channel_heap_.push_back(std::make_pair(timestamp, channel_index));
1932
1933 // The default sort puts the newest message first. Use a custom comparator to
1934 // put the oldest message first.
1935 std::push_heap(channel_heap_.begin(), channel_heap_.end(),
1936 ChannelHeapCompare);
1937}
1938
Austin Schuh2f8fd752020-09-01 22:38:28 -07001939void ChannelMerger::VerifyHeaps() {
Austin Schuh661a8d82020-09-13 17:25:56 -07001940 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1941 channel_heap_;
1942 std::make_heap(channel_heap.begin(), channel_heap.end(), &ChannelHeapCompare);
Austin Schuh2f8fd752020-09-01 22:38:28 -07001943
Austin Schuh661a8d82020-09-13 17:25:56 -07001944 for (size_t i = 0; i < channel_heap_.size(); ++i) {
1945 CHECK(channel_heap_[i] == channel_heap[i]) << ": Heaps diverged...";
1946 CHECK_EQ(
1947 std::get<0>(channel_heap[i]),
1948 timestamp_mergers_[std::get<1>(channel_heap[i])].channel_merger_time());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001949 }
1950}
1951
Austin Schuh6f3babe2020-01-26 20:34:50 -08001952std::tuple<TimestampMerger::DeliveryTimestamp, int,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001953 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001954ChannelMerger::PopOldest() {
Austin Schuh8bd96322020-02-13 21:18:22 -08001955 CHECK_GT(channel_heap_.size(), 0u);
Austin Schuh05b70472020-01-01 17:11:17 -08001956 std::pair<monotonic_clock::time_point, int> oldest_channel_data =
1957 channel_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001958 int channel_index = oldest_channel_data.second;
Austin Schuh05b70472020-01-01 17:11:17 -08001959 std::pop_heap(channel_heap_.begin(), channel_heap_.end(),
1960 &ChannelHeapCompare);
1961 channel_heap_.pop_back();
Austin Schuh8bd96322020-02-13 21:18:22 -08001962
Austin Schuh6f3babe2020-01-26 20:34:50 -08001963 timestamp_mergers_[channel_index].set_pushed(false);
Austin Schuh05b70472020-01-01 17:11:17 -08001964
Austin Schuh6f3babe2020-01-26 20:34:50 -08001965 TimestampMerger *merger = &timestamp_mergers_[channel_index];
Austin Schuh05b70472020-01-01 17:11:17 -08001966
Austin Schuhcde938c2020-02-02 17:30:07 -08001967 // Merger handles any queueing needed from here.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001968 std::tuple<TimestampMerger::DeliveryTimestamp,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001969 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001970 message = merger->PopOldest();
Brian Silverman8a32ce62020-08-12 12:02:38 -07001971 DCHECK_EQ(std::get<0>(message).monotonic_event_time,
1972 oldest_channel_data.first)
1973 << ": channel_heap_ was corrupted for " << channel_index << ": "
1974 << DebugString();
Austin Schuh05b70472020-01-01 17:11:17 -08001975
Austin Schuh2f8fd752020-09-01 22:38:28 -07001976 CHECK_GE(std::get<0>(message).monotonic_event_time, last_popped_time_)
1977 << ": " << MaybeNodeName(log_file_header()->node())
1978 << "Messages came off the queue out of order. " << DebugString();
1979 last_popped_time_ = std::get<0>(message).monotonic_event_time;
1980
1981 VLOG(1) << "Popped " << last_popped_time_ << " "
1982 << configuration::StrippedChannelToString(
1983 configuration()->channels()->Get(channel_index))
1984 << " (" << channel_index << ")";
1985
Austin Schuh6f3babe2020-01-26 20:34:50 -08001986 return std::make_tuple(std::get<0>(message), channel_index,
1987 std::move(std::get<1>(message)));
1988}
1989
Austin Schuhcde938c2020-02-02 17:30:07 -08001990std::string SplitMessageReader::MessageHeaderQueue::DebugString() const {
1991 std::stringstream ss;
1992 for (size_t i = 0; i < data_.size(); ++i) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001993 if (i < 5 || i + 5 > data_.size()) {
1994 if (timestamps) {
1995 ss << " msg: ";
1996 } else {
1997 ss << " timestamp: ";
1998 }
1999 ss << monotonic_clock::time_point(
2000 chrono::nanoseconds(data_[i].message().monotonic_sent_time()))
Austin Schuhcde938c2020-02-02 17:30:07 -08002001 << " ("
Austin Schuh2f8fd752020-09-01 22:38:28 -07002002 << realtime_clock::time_point(
2003 chrono::nanoseconds(data_[i].message().realtime_sent_time()))
2004 << ") " << data_[i].message().queue_index();
2005 if (timestamps) {
2006 ss << " <- remote "
2007 << monotonic_clock::time_point(chrono::nanoseconds(
2008 data_[i].message().monotonic_remote_time()))
2009 << " ("
2010 << realtime_clock::time_point(chrono::nanoseconds(
2011 data_[i].message().realtime_remote_time()))
2012 << ")";
2013 }
2014 ss << "\n";
2015 } else if (i == 5) {
2016 ss << " ...\n";
Austin Schuh6f3babe2020-01-26 20:34:50 -08002017 }
Austin Schuhcde938c2020-02-02 17:30:07 -08002018 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08002019
Austin Schuhcde938c2020-02-02 17:30:07 -08002020 return ss.str();
2021}
Austin Schuh6f3babe2020-01-26 20:34:50 -08002022
Austin Schuhcde938c2020-02-02 17:30:07 -08002023std::string SplitMessageReader::DebugString(int channel) const {
2024 std::stringstream ss;
2025 ss << "[\n";
2026 ss << channels_[channel].data.DebugString();
2027 ss << " ]";
2028 return ss.str();
2029}
Austin Schuh6f3babe2020-01-26 20:34:50 -08002030
Austin Schuhcde938c2020-02-02 17:30:07 -08002031std::string SplitMessageReader::DebugString(int channel, int node_index) const {
2032 std::stringstream ss;
2033 ss << "[\n";
2034 ss << channels_[channel].timestamps[node_index].DebugString();
2035 ss << " ]";
2036 return ss.str();
2037}
2038
2039std::string TimestampMerger::DebugString() const {
2040 std::stringstream ss;
2041
2042 if (timestamp_heap_.size() > 0) {
2043 ss << " timestamp_heap {\n";
2044 std::vector<
2045 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
2046 timestamp_heap = timestamp_heap_;
2047 while (timestamp_heap.size() > 0u) {
2048 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
2049 oldest_timestamp_reader = timestamp_heap.front();
2050
2051 ss << " " << std::get<2>(oldest_timestamp_reader) << " "
2052 << std::get<0>(oldest_timestamp_reader) << " queue_index ("
2053 << std::get<1>(oldest_timestamp_reader) << ") ttq "
2054 << std::get<2>(oldest_timestamp_reader)->time_to_queue() << " "
2055 << std::get<2>(oldest_timestamp_reader)->filename() << " -> "
2056 << std::get<2>(oldest_timestamp_reader)
2057 ->DebugString(channel_index_, node_index_)
2058 << "\n";
2059
2060 std::pop_heap(timestamp_heap.begin(), timestamp_heap.end(),
2061 &SplitMessageReaderHeapCompare);
2062 timestamp_heap.pop_back();
2063 }
2064 ss << " }\n";
2065 }
2066
2067 ss << " message_heap {\n";
2068 {
2069 std::vector<
2070 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
2071 message_heap = message_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07002072 while (!message_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08002073 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
2074 oldest_message_reader = message_heap.front();
2075
2076 ss << " " << std::get<2>(oldest_message_reader) << " "
2077 << std::get<0>(oldest_message_reader) << " queue_index ("
2078 << std::get<1>(oldest_message_reader) << ") ttq "
2079 << std::get<2>(oldest_message_reader)->time_to_queue() << " "
2080 << std::get<2>(oldest_message_reader)->filename() << " -> "
2081 << std::get<2>(oldest_message_reader)->DebugString(channel_index_)
2082 << "\n";
2083
2084 std::pop_heap(message_heap.begin(), message_heap.end(),
2085 &SplitMessageReaderHeapCompare);
2086 message_heap.pop_back();
Austin Schuh6f3babe2020-01-26 20:34:50 -08002087 }
Austin Schuh05b70472020-01-01 17:11:17 -08002088 }
Austin Schuhcde938c2020-02-02 17:30:07 -08002089 ss << " }";
2090
2091 return ss.str();
2092}
2093
2094std::string ChannelMerger::DebugString() const {
2095 std::stringstream ss;
2096 ss << "start_time " << realtime_start_time() << " " << monotonic_start_time()
2097 << "\n";
2098 ss << "channel_heap {\n";
2099 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
2100 channel_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07002101 while (!channel_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08002102 std::tuple<monotonic_clock::time_point, int> channel = channel_heap.front();
2103 ss << " " << std::get<0>(channel) << " (" << std::get<1>(channel) << ") "
2104 << configuration::CleanedChannelToString(
2105 configuration()->channels()->Get(std::get<1>(channel)))
2106 << "\n";
2107
2108 ss << timestamp_mergers_[std::get<1>(channel)].DebugString() << "\n";
2109
2110 std::pop_heap(channel_heap.begin(), channel_heap.end(),
2111 &ChannelHeapCompare);
2112 channel_heap.pop_back();
2113 }
2114 ss << "}";
2115
2116 return ss.str();
Austin Schuh05b70472020-01-01 17:11:17 -08002117}
2118
Austin Schuhee711052020-08-24 16:06:09 -07002119std::string MaybeNodeName(const Node *node) {
2120 if (node != nullptr) {
2121 return node->name()->str() + " ";
2122 }
2123 return "";
2124}
2125
Brian Silvermanf51499a2020-09-21 12:49:08 -07002126} // namespace aos::logger