blob: 3a39803eea4581770e4a4bca53f23e43ef9ee36a [file] [log] [blame]
Austin Schuha36c8902019-12-30 18:07:15 -08001#include "aos/events/logging/logfile_utils.h"
2
3#include <fcntl.h>
Austin Schuha36c8902019-12-30 18:07:15 -08004#include <sys/stat.h>
5#include <sys/types.h>
6#include <sys/uio.h>
7
Brian Silvermanf51499a2020-09-21 12:49:08 -07008#include <algorithm>
9#include <climits>
Austin Schuha36c8902019-12-30 18:07:15 -080010
Austin Schuhe4fca832020-03-07 16:58:53 -080011#include "absl/strings/escaping.h"
Austin Schuh05b70472020-01-01 17:11:17 -080012#include "aos/configuration.h"
Austin Schuhfa895892020-01-07 20:07:41 -080013#include "aos/flatbuffer_merge.h"
Austin Schuh6f3babe2020-01-26 20:34:50 -080014#include "aos/util/file.h"
Austin Schuha36c8902019-12-30 18:07:15 -080015#include "flatbuffers/flatbuffers.h"
Austin Schuh05b70472020-01-01 17:11:17 -080016#include "gflags/gflags.h"
17#include "glog/logging.h"
Austin Schuha36c8902019-12-30 18:07:15 -080018
Brian Silvermanf59fe3f2020-09-22 21:04:09 -070019#if defined(__x86_64__)
20#define ENABLE_LZMA 1
21#elif defined(__aarch64__)
22#define ENABLE_LZMA 1
23#else
24#define ENABLE_LZMA 0
25#endif
26
27#if ENABLE_LZMA
28#include "aos/events/logging/lzma_encoder.h"
29#endif
30
Austin Schuh7fbf5a72020-09-21 16:28:13 -070031DEFINE_int32(flush_size, 128000,
Austin Schuha36c8902019-12-30 18:07:15 -080032 "Number of outstanding bytes to allow before flushing to disk.");
33
Brian Silvermanf51499a2020-09-21 12:49:08 -070034namespace aos::logger {
Austin Schuha36c8902019-12-30 18:07:15 -080035
Austin Schuh05b70472020-01-01 17:11:17 -080036namespace chrono = std::chrono;
37
Brian Silvermanf51499a2020-09-21 12:49:08 -070038DetachedBufferWriter::DetachedBufferWriter(
39 std::string_view filename, std::unique_ptr<DetachedBufferEncoder> encoder)
40 : filename_(filename), encoder_(std::move(encoder)) {
Brian Silvermana9f2ec92020-10-06 18:00:53 -070041 if (!util::MkdirPIfSpace(filename, 0777)) {
42 ran_out_of_space_ = true;
43 } else {
44 fd_ = open(std::string(filename).c_str(),
45 O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);
46 if (fd_ == -1 && errno == ENOSPC) {
47 ran_out_of_space_ = true;
48 } else {
49 PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";
50 VLOG(1) << "Opened " << filename << " for writing";
51 }
52 }
Austin Schuha36c8902019-12-30 18:07:15 -080053}
54
55DetachedBufferWriter::~DetachedBufferWriter() {
Brian Silverman0465fcf2020-09-24 00:29:18 -070056 Close();
57 if (ran_out_of_space_) {
58 CHECK(acknowledge_ran_out_of_space_)
59 << ": Unacknowledged out of disk space, log file was not completed";
Brian Silvermanf51499a2020-09-21 12:49:08 -070060 }
Austin Schuh2f8fd752020-09-01 22:38:28 -070061}
62
Brian Silvermand90905f2020-09-23 14:42:56 -070063DetachedBufferWriter::DetachedBufferWriter(DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070064 *this = std::move(other);
65}
66
Brian Silverman87ac0402020-09-17 14:47:01 -070067// When other is destroyed "soon" (which it should be because we're getting an
68// rvalue reference to it), it will flush etc all the data we have queued up
69// (because that data will then be its data).
Austin Schuh2f8fd752020-09-01 22:38:28 -070070DetachedBufferWriter &DetachedBufferWriter::operator=(
71 DetachedBufferWriter &&other) {
Austin Schuh2f8fd752020-09-01 22:38:28 -070072 std::swap(filename_, other.filename_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070073 std::swap(encoder_, other.encoder_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070074 std::swap(fd_, other.fd_);
Brian Silverman0465fcf2020-09-24 00:29:18 -070075 std::swap(ran_out_of_space_, other.ran_out_of_space_);
76 std::swap(acknowledge_ran_out_of_space_, other.acknowledge_ran_out_of_space_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070077 std::swap(iovec_, other.iovec_);
Brian Silvermanf51499a2020-09-21 12:49:08 -070078 std::swap(max_write_time_, other.max_write_time_);
79 std::swap(max_write_time_bytes_, other.max_write_time_bytes_);
80 std::swap(max_write_time_messages_, other.max_write_time_messages_);
81 std::swap(total_write_time_, other.total_write_time_);
82 std::swap(total_write_count_, other.total_write_count_);
83 std::swap(total_write_messages_, other.total_write_messages_);
84 std::swap(total_write_bytes_, other.total_write_bytes_);
Austin Schuh2f8fd752020-09-01 22:38:28 -070085 return *this;
Austin Schuha36c8902019-12-30 18:07:15 -080086}
87
Brian Silvermanf51499a2020-09-21 12:49:08 -070088void DetachedBufferWriter::QueueSpan(absl::Span<const uint8_t> span) {
Brian Silvermana9f2ec92020-10-06 18:00:53 -070089 if (ran_out_of_space_) {
90 // We don't want any later data to be written after space becomes
91 // available, so refuse to write anything more once we've dropped data
92 // because we ran out of space.
93 VLOG(1) << "Ignoring span: " << span.size();
94 return;
95 }
96
Brian Silvermanf51499a2020-09-21 12:49:08 -070097 if (encoder_->may_bypass() && span.size() > 4096u) {
98 // Over this threshold, we'll assume it's cheaper to add an extra
99 // syscall to write the data immediately instead of copying it to
100 // enqueue.
Austin Schuha36c8902019-12-30 18:07:15 -0800101
Brian Silvermanf51499a2020-09-21 12:49:08 -0700102 // First, flush everything.
103 while (encoder_->queue_size() > 0u) {
104 Flush();
105 }
Austin Schuhde031b72020-01-10 19:34:41 -0800106
Brian Silvermanf51499a2020-09-21 12:49:08 -0700107 // Then, write it directly.
108 const auto start = aos::monotonic_clock::now();
109 const ssize_t written = write(fd_, span.data(), span.size());
110 const auto end = aos::monotonic_clock::now();
Brian Silverman0465fcf2020-09-24 00:29:18 -0700111 HandleWriteReturn(written, span.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700112 UpdateStatsForWrite(end - start, written, 1);
113 } else {
114 encoder_->Encode(CopySpanAsDetachedBuffer(span));
Austin Schuha36c8902019-12-30 18:07:15 -0800115 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700116
117 FlushAtThreshold();
Austin Schuha36c8902019-12-30 18:07:15 -0800118}
119
Brian Silverman0465fcf2020-09-24 00:29:18 -0700120void DetachedBufferWriter::Close() {
121 if (fd_ == -1) {
122 return;
123 }
124 encoder_->Finish();
125 while (encoder_->queue_size() > 0) {
126 Flush();
127 }
128 if (close(fd_) == -1) {
129 if (errno == ENOSPC) {
130 ran_out_of_space_ = true;
131 } else {
132 PLOG(ERROR) << "Closing log file failed";
133 }
134 }
135 fd_ = -1;
136 VLOG(1) << "Closed " << filename_;
137}
138
Austin Schuha36c8902019-12-30 18:07:15 -0800139void DetachedBufferWriter::Flush() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700140 const auto queue = encoder_->queue();
141 if (queue.empty()) {
Austin Schuha36c8902019-12-30 18:07:15 -0800142 return;
143 }
Brian Silverman0465fcf2020-09-24 00:29:18 -0700144 if (ran_out_of_space_) {
145 // We don't want any later data to be written after space becomes available,
146 // so refuse to write anything more once we've dropped data because we ran
147 // out of space.
148 VLOG(1) << "Ignoring queue: " << queue.size();
149 encoder_->Clear(queue.size());
150 return;
151 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700152
Austin Schuha36c8902019-12-30 18:07:15 -0800153 iovec_.clear();
Brian Silvermanf51499a2020-09-21 12:49:08 -0700154 const size_t iovec_size = std::min<size_t>(queue.size(), IOV_MAX);
155 iovec_.resize(iovec_size);
Austin Schuha36c8902019-12-30 18:07:15 -0800156 size_t counted_size = 0;
Brian Silvermanf51499a2020-09-21 12:49:08 -0700157 for (size_t i = 0; i < iovec_size; ++i) {
158 iovec_[i].iov_base = const_cast<uint8_t *>(queue[i].data());
159 iovec_[i].iov_len = queue[i].size();
160 counted_size += iovec_[i].iov_len;
Austin Schuha36c8902019-12-30 18:07:15 -0800161 }
Brian Silvermanf51499a2020-09-21 12:49:08 -0700162
163 const auto start = aos::monotonic_clock::now();
Austin Schuha36c8902019-12-30 18:07:15 -0800164 const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());
Brian Silvermanf51499a2020-09-21 12:49:08 -0700165 const auto end = aos::monotonic_clock::now();
Brian Silverman0465fcf2020-09-24 00:29:18 -0700166 HandleWriteReturn(written, counted_size);
Brian Silvermanf51499a2020-09-21 12:49:08 -0700167
168 encoder_->Clear(iovec_size);
169
170 UpdateStatsForWrite(end - start, written, iovec_size);
171}
172
Brian Silverman0465fcf2020-09-24 00:29:18 -0700173void DetachedBufferWriter::HandleWriteReturn(ssize_t write_return,
174 size_t write_size) {
175 if (write_return == -1 && errno == ENOSPC) {
176 ran_out_of_space_ = true;
177 return;
178 }
179 PCHECK(write_return >= 0) << ": write failed";
180 if (write_return < static_cast<ssize_t>(write_size)) {
181 // Sometimes this happens instead of ENOSPC. On a real filesystem, this
182 // never seems to happen in any other case. If we ever want to log to a
183 // socket, this will happen more often. However, until we get there, we'll
184 // just assume it means we ran out of space.
185 ran_out_of_space_ = true;
186 return;
187 }
188}
189
Brian Silvermanf51499a2020-09-21 12:49:08 -0700190void DetachedBufferWriter::UpdateStatsForWrite(
191 aos::monotonic_clock::duration duration, ssize_t written, int iovec_size) {
192 if (duration > max_write_time_) {
193 max_write_time_ = duration;
194 max_write_time_bytes_ = written;
195 max_write_time_messages_ = iovec_size;
196 }
197 total_write_time_ += duration;
198 ++total_write_count_;
199 total_write_messages_ += iovec_size;
200 total_write_bytes_ += written;
201}
202
203void DetachedBufferWriter::FlushAtThreshold() {
204 // Flush if we are at the max number of iovs per writev, because there's no
205 // point queueing up any more data in memory. Also flush once we have enough
206 // data queued up.
207 while (encoder_->queued_bytes() > static_cast<size_t>(FLAGS_flush_size) ||
208 encoder_->queue_size() >= IOV_MAX) {
209 Flush();
210 }
Austin Schuha36c8902019-12-30 18:07:15 -0800211}
212
213flatbuffers::Offset<MessageHeader> PackMessage(
214 flatbuffers::FlatBufferBuilder *fbb, const Context &context,
215 int channel_index, LogType log_type) {
216 flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;
217
218 switch (log_type) {
219 case LogType::kLogMessage:
220 case LogType::kLogMessageAndDeliveryTime:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800221 case LogType::kLogRemoteMessage:
Brian Silvermaneaa41d62020-07-08 19:47:35 -0700222 data_offset = fbb->CreateVector(
223 static_cast<const uint8_t *>(context.data), context.size);
Austin Schuha36c8902019-12-30 18:07:15 -0800224 break;
225
226 case LogType::kLogDeliveryTimeOnly:
227 break;
228 }
229
230 MessageHeader::Builder message_header_builder(*fbb);
231 message_header_builder.add_channel_index(channel_index);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800232
233 switch (log_type) {
234 case LogType::kLogRemoteMessage:
235 message_header_builder.add_queue_index(context.remote_queue_index);
236 message_header_builder.add_monotonic_sent_time(
237 context.monotonic_remote_time.time_since_epoch().count());
238 message_header_builder.add_realtime_sent_time(
239 context.realtime_remote_time.time_since_epoch().count());
240 break;
241
242 case LogType::kLogMessage:
243 case LogType::kLogMessageAndDeliveryTime:
244 case LogType::kLogDeliveryTimeOnly:
245 message_header_builder.add_queue_index(context.queue_index);
246 message_header_builder.add_monotonic_sent_time(
247 context.monotonic_event_time.time_since_epoch().count());
248 message_header_builder.add_realtime_sent_time(
249 context.realtime_event_time.time_since_epoch().count());
250 break;
251 }
Austin Schuha36c8902019-12-30 18:07:15 -0800252
253 switch (log_type) {
254 case LogType::kLogMessage:
Austin Schuh6f3babe2020-01-26 20:34:50 -0800255 case LogType::kLogRemoteMessage:
Austin Schuha36c8902019-12-30 18:07:15 -0800256 message_header_builder.add_data(data_offset);
257 break;
258
259 case LogType::kLogMessageAndDeliveryTime:
260 message_header_builder.add_data(data_offset);
261 [[fallthrough]];
262
263 case LogType::kLogDeliveryTimeOnly:
264 message_header_builder.add_monotonic_remote_time(
265 context.monotonic_remote_time.time_since_epoch().count());
266 message_header_builder.add_realtime_remote_time(
267 context.realtime_remote_time.time_since_epoch().count());
268 message_header_builder.add_remote_queue_index(context.remote_queue_index);
269 break;
270 }
271
272 return message_header_builder.Finish();
273}
274
Brian Silvermanf51499a2020-09-21 12:49:08 -0700275SpanReader::SpanReader(std::string_view filename) : filename_(filename) {
Brian Silvermanf59fe3f2020-09-22 21:04:09 -0700276 static const std::string_view kXz = ".xz";
277 if (filename.substr(filename.size() - kXz.size()) == kXz) {
278#if ENABLE_LZMA
279 decoder_ = std::make_unique<LzmaDecoder>(filename);
280#else
281 LOG(FATAL) << "Reading xz-compressed files not supported on this platform";
282#endif
283 } else {
284 decoder_ = std::make_unique<DummyDecoder>(filename);
285 }
Austin Schuh05b70472020-01-01 17:11:17 -0800286}
287
288absl::Span<const uint8_t> SpanReader::ReadMessage() {
289 // Make sure we have enough for the size.
290 if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {
291 if (!ReadBlock()) {
292 return absl::Span<const uint8_t>();
293 }
294 }
295
296 // Now make sure we have enough for the message.
297 const size_t data_size =
298 flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +
299 sizeof(flatbuffers::uoffset_t);
Austin Schuhe4fca832020-03-07 16:58:53 -0800300 if (data_size == sizeof(flatbuffers::uoffset_t)) {
301 LOG(ERROR) << "Size of data is zero. Log file end is corrupted, skipping.";
302 LOG(ERROR) << " Rest of log file is "
303 << absl::BytesToHexString(std::string_view(
304 reinterpret_cast<const char *>(data_.data() +
305 consumed_data_),
306 data_.size() - consumed_data_));
307 return absl::Span<const uint8_t>();
308 }
Austin Schuh05b70472020-01-01 17:11:17 -0800309 while (data_.size() < consumed_data_ + data_size) {
310 if (!ReadBlock()) {
311 return absl::Span<const uint8_t>();
312 }
313 }
314
315 // And return it, consuming the data.
316 const uint8_t *data_ptr = data_.data() + consumed_data_;
317
318 consumed_data_ += data_size;
319
320 return absl::Span<const uint8_t>(data_ptr, data_size);
321}
322
Austin Schuh05b70472020-01-01 17:11:17 -0800323bool SpanReader::ReadBlock() {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700324 // This is the amount of data we grab at a time. Doing larger chunks minimizes
325 // syscalls and helps decompressors batch things more efficiently.
Austin Schuh05b70472020-01-01 17:11:17 -0800326 constexpr size_t kReadSize = 256 * 1024;
327
328 // Strip off any unused data at the front.
329 if (consumed_data_ != 0) {
Brian Silvermanf51499a2020-09-21 12:49:08 -0700330 data_.erase_front(consumed_data_);
Austin Schuh05b70472020-01-01 17:11:17 -0800331 consumed_data_ = 0;
332 }
333
334 const size_t starting_size = data_.size();
335
336 // This should automatically grow the backing store. It won't shrink if we
337 // get a small chunk later. This reduces allocations when we want to append
338 // more data.
Brian Silvermanf51499a2020-09-21 12:49:08 -0700339 data_.resize(starting_size + kReadSize);
Austin Schuh05b70472020-01-01 17:11:17 -0800340
Brian Silvermanf51499a2020-09-21 12:49:08 -0700341 const size_t count =
342 decoder_->Read(data_.begin() + starting_size, data_.end());
343 data_.resize(starting_size + count);
Austin Schuh05b70472020-01-01 17:11:17 -0800344 if (count == 0) {
Austin Schuh05b70472020-01-01 17:11:17 -0800345 return false;
346 }
Austin Schuh05b70472020-01-01 17:11:17 -0800347
348 return true;
349}
350
Austin Schuhadd6eb32020-11-09 21:24:26 -0800351std::optional<SizePrefixedFlatbufferVector<LogFileHeader>> ReadHeader(
Austin Schuh3bd4c402020-11-06 18:19:06 -0800352 std::string_view filename) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800353 SpanReader span_reader(filename);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800354 absl::Span<const uint8_t> config_data = span_reader.ReadMessage();
355
356 // Make sure something was read.
Austin Schuh3bd4c402020-11-06 18:19:06 -0800357 if (config_data == absl::Span<const uint8_t>()) {
358 return std::nullopt;
359 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800360
Austin Schuh5212cad2020-09-09 23:12:09 -0700361 // And copy the config so we have it forever, removing the size prefix.
Brian Silverman354697a2020-09-22 21:06:32 -0700362 ResizeableBuffer data;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800363 data.resize(config_data.size());
364 memcpy(data.data(), config_data.begin(), data.size());
365 return SizePrefixedFlatbufferVector<LogFileHeader>(std::move(data));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800366}
367
Austin Schuhadd6eb32020-11-09 21:24:26 -0800368std::optional<SizePrefixedFlatbufferVector<MessageHeader>> ReadNthMessage(
Austin Schuh3bd4c402020-11-06 18:19:06 -0800369 std::string_view filename, size_t n) {
Austin Schuh5212cad2020-09-09 23:12:09 -0700370 SpanReader span_reader(filename);
371 absl::Span<const uint8_t> data_span = span_reader.ReadMessage();
372 for (size_t i = 0; i < n + 1; ++i) {
373 data_span = span_reader.ReadMessage();
374
375 // Make sure something was read.
Austin Schuh3bd4c402020-11-06 18:19:06 -0800376 if (data_span == absl::Span<const uint8_t>()) {
377 return std::nullopt;
378 }
Austin Schuh5212cad2020-09-09 23:12:09 -0700379 }
380
Brian Silverman354697a2020-09-22 21:06:32 -0700381 // And copy the config so we have it forever, removing the size prefix.
382 ResizeableBuffer data;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800383 data.resize(data_span.size());
384 memcpy(data.data(), data_span.begin(), data.size());
385 return SizePrefixedFlatbufferVector<MessageHeader>(std::move(data));
Austin Schuh5212cad2020-09-09 23:12:09 -0700386}
387
Austin Schuh05b70472020-01-01 17:11:17 -0800388MessageReader::MessageReader(std::string_view filename)
Austin Schuh97789fc2020-08-01 14:42:45 -0700389 : span_reader_(filename),
Austin Schuhadd6eb32020-11-09 21:24:26 -0800390 raw_log_file_header_(
391 SizePrefixedFlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuh05b70472020-01-01 17:11:17 -0800392 // Make sure we have enough to read the size.
Austin Schuh97789fc2020-08-01 14:42:45 -0700393 absl::Span<const uint8_t> header_data = span_reader_.ReadMessage();
Austin Schuh05b70472020-01-01 17:11:17 -0800394
395 // Make sure something was read.
Austin Schuh97789fc2020-08-01 14:42:45 -0700396 CHECK(header_data != absl::Span<const uint8_t>())
397 << ": Failed to read header from: " << filename;
Austin Schuh05b70472020-01-01 17:11:17 -0800398
Austin Schuh97789fc2020-08-01 14:42:45 -0700399 // And copy the header data so we have it forever.
Brian Silverman354697a2020-09-22 21:06:32 -0700400 ResizeableBuffer header_data_copy;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800401 header_data_copy.resize(header_data.size());
402 memcpy(header_data_copy.data(), header_data.begin(), header_data_copy.size());
Austin Schuh97789fc2020-08-01 14:42:45 -0700403 raw_log_file_header_ =
Austin Schuhadd6eb32020-11-09 21:24:26 -0800404 SizePrefixedFlatbufferVector<LogFileHeader>(std::move(header_data_copy));
Austin Schuh05b70472020-01-01 17:11:17 -0800405
Austin Schuhcde938c2020-02-02 17:30:07 -0800406 max_out_of_order_duration_ =
Austin Schuh2f8fd752020-09-01 22:38:28 -0700407 chrono::nanoseconds(log_file_header()->max_out_of_order_duration());
Austin Schuhcde938c2020-02-02 17:30:07 -0800408
409 VLOG(1) << "Opened " << filename << " as node "
410 << FlatbufferToJson(log_file_header()->node());
Austin Schuh05b70472020-01-01 17:11:17 -0800411}
412
Austin Schuhadd6eb32020-11-09 21:24:26 -0800413std::optional<SizePrefixedFlatbufferVector<MessageHeader>>
414MessageReader::ReadMessage() {
Austin Schuh05b70472020-01-01 17:11:17 -0800415 absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();
416 if (msg_data == absl::Span<const uint8_t>()) {
417 return std::nullopt;
418 }
419
Brian Silverman354697a2020-09-22 21:06:32 -0700420 ResizeableBuffer result_buffer;
Austin Schuhadd6eb32020-11-09 21:24:26 -0800421 result_buffer.resize(msg_data.size());
422 memcpy(result_buffer.data(), msg_data.begin(), result_buffer.size());
423 SizePrefixedFlatbufferVector<MessageHeader> result(std::move(result_buffer));
Austin Schuh05b70472020-01-01 17:11:17 -0800424
425 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
426 chrono::nanoseconds(result.message().monotonic_sent_time()));
427
428 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
Austin Schuh8bd96322020-02-13 21:18:22 -0800429 VLOG(2) << "Read from " << filename() << " data " << FlatbufferToJson(result);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800430 return std::move(result);
Austin Schuh05b70472020-01-01 17:11:17 -0800431}
432
Austin Schuhc41603c2020-10-11 16:17:37 -0700433PartsMessageReader::PartsMessageReader(LogParts log_parts)
434 : parts_(std::move(log_parts)), message_reader_(parts_.parts[0]) {}
435
Austin Schuhadd6eb32020-11-09 21:24:26 -0800436std::optional<SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuhc41603c2020-10-11 16:17:37 -0700437PartsMessageReader::ReadMessage() {
438 while (!done_) {
Austin Schuhadd6eb32020-11-09 21:24:26 -0800439 std::optional<SizePrefixedFlatbufferVector<MessageHeader>> message =
Austin Schuhc41603c2020-10-11 16:17:37 -0700440 message_reader_.ReadMessage();
441 if (message) {
442 newest_timestamp_ = message_reader_.newest_timestamp();
Austin Schuh32f68492020-11-08 21:45:51 -0800443 const monotonic_clock::time_point monotonic_sent_time(
444 chrono::nanoseconds(message->message().monotonic_sent_time()));
445 CHECK_GE(monotonic_sent_time,
446 newest_timestamp_ - max_out_of_order_duration());
Austin Schuhc41603c2020-10-11 16:17:37 -0700447 return message;
448 }
449 NextLog();
450 }
Austin Schuh32f68492020-11-08 21:45:51 -0800451 newest_timestamp_ = monotonic_clock::max_time;
Austin Schuhc41603c2020-10-11 16:17:37 -0700452 return std::nullopt;
453}
454
455void PartsMessageReader::NextLog() {
456 if (next_part_index_ == parts_.parts.size()) {
457 done_ = true;
458 return;
459 }
460 message_reader_ = MessageReader(parts_.parts[next_part_index_]);
461 ++next_part_index_;
462}
463
Austin Schuh1be0ce42020-11-29 22:43:26 -0800464bool Message::operator<(const Message &m2) const {
465 if (this->timestamp < m2.timestamp) {
466 return true;
467 } else if (this->timestamp > m2.timestamp) {
468 return false;
469 }
470
471 if (this->channel_index < m2.channel_index) {
472 return true;
473 } else if (this->channel_index > m2.channel_index) {
474 return false;
475 }
476
477 return this->queue_index < m2.queue_index;
478}
479
480bool Message::operator>=(const Message &m2) const { return !(*this < m2); }
481
482std::ostream &operator<<(std::ostream &os, const Message &m) {
483 os << "{.channel_index=" << m.channel_index
484 << ", .queue_index=" << m.queue_index << ", .timestamp=" << m.timestamp
485 << ", .data="
486 << aos::FlatbufferToJson(m.data,
487 {.multi_line = false, .max_vector_size = 1})
488 << "}";
489 return os;
490}
491
Austin Schuh6f3babe2020-01-26 20:34:50 -0800492SplitMessageReader::SplitMessageReader(
Austin Schuhfa895892020-01-07 20:07:41 -0800493 const std::vector<std::string> &filenames)
494 : filenames_(filenames),
Austin Schuhadd6eb32020-11-09 21:24:26 -0800495 log_file_header_(SizePrefixedFlatbufferVector<LogFileHeader>::Empty()) {
Austin Schuhfa895892020-01-07 20:07:41 -0800496 CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";
497
Austin Schuh6f3babe2020-01-26 20:34:50 -0800498 // Grab any log file header. They should all match (and we will check as we
499 // open more of them).
Austin Schuh97789fc2020-08-01 14:42:45 -0700500 log_file_header_ = message_reader_->raw_log_file_header();
Austin Schuhfa895892020-01-07 20:07:41 -0800501
Austin Schuh2f8fd752020-09-01 22:38:28 -0700502 for (size_t i = 1; i < filenames_.size(); ++i) {
503 MessageReader message_reader(filenames_[i]);
504
505 const monotonic_clock::time_point new_monotonic_start_time(
506 chrono::nanoseconds(
507 message_reader.log_file_header()->monotonic_start_time()));
508 const realtime_clock::time_point new_realtime_start_time(
509 chrono::nanoseconds(
510 message_reader.log_file_header()->realtime_start_time()));
511
512 // There are 2 types of part files. Part files from before time estimation
513 // has started, and part files after. We don't declare a log file "started"
514 // until time estimation is up. And once a log file starts, it should never
515 // stop again, and should remain constant.
516 // To compare both types of headers, we mutate our saved copy of the header
517 // to match the next chunk by updating time if we detect a stopped ->
518 // started transition.
519 if (monotonic_start_time() == monotonic_clock::min_time) {
520 CHECK_EQ(realtime_start_time(), realtime_clock::min_time);
521 // We should only be missing the monotonic start time when logging data
Brian Silverman87ac0402020-09-17 14:47:01 -0700522 // for remote nodes. We don't have a good way to determine the remote
Austin Schuh2f8fd752020-09-01 22:38:28 -0700523 // realtime offset, so it shouldn't be filled out.
524 // TODO(austin): If we have a good way, feel free to fill it out. It
525 // probably won't be better than we could do in post though with the same
526 // data.
527 CHECK(!log_file_header_.mutable_message()->has_realtime_start_time());
528 if (new_monotonic_start_time != monotonic_clock::min_time) {
529 // If we finally found our start time, update the header. Do this once
530 // because it should never change again.
531 log_file_header_.mutable_message()->mutate_monotonic_start_time(
532 new_monotonic_start_time.time_since_epoch().count());
533 log_file_header_.mutable_message()->mutate_realtime_start_time(
534 new_realtime_start_time.time_since_epoch().count());
535 }
536 }
537
Austin Schuh64fab802020-09-09 22:47:47 -0700538 // We don't have a good way to set the realtime start time on remote nodes.
539 // Confirm it remains consistent.
540 CHECK_EQ(log_file_header_.mutable_message()->has_realtime_start_time(),
541 message_reader.log_file_header()->has_realtime_start_time());
542
543 // Parts index will *not* match unless we set them to match. We only want
544 // to accept the start time and parts mismatching, so set them.
545 log_file_header_.mutable_message()->mutate_parts_index(
546 message_reader.log_file_header()->parts_index());
547
Austin Schuh2f8fd752020-09-01 22:38:28 -0700548 // Now compare that the headers match.
Austin Schuh64fab802020-09-09 22:47:47 -0700549 if (!CompareFlatBuffer(message_reader.raw_log_file_header(),
550 log_file_header_)) {
Brian Silvermanae7c0332020-09-30 16:58:23 -0700551 if (message_reader.log_file_header()->has_log_event_uuid() &&
552 log_file_header_.message().has_log_event_uuid() &&
553 message_reader.log_file_header()->log_event_uuid()->string_view() !=
554 log_file_header_.message().log_event_uuid()->string_view()) {
Austin Schuh64fab802020-09-09 22:47:47 -0700555 LOG(FATAL) << "Logger UUIDs don't match between log file chunks "
556 << filenames_[0] << " and " << filenames_[i]
557 << ", this is not supported.";
558 }
559 if (message_reader.log_file_header()->has_parts_uuid() &&
560 log_file_header_.message().has_parts_uuid() &&
561 message_reader.log_file_header()->parts_uuid()->string_view() !=
562 log_file_header_.message().parts_uuid()->string_view()) {
563 LOG(FATAL) << "Parts UUIDs don't match between log file chunks "
564 << filenames_[0] << " and " << filenames_[i]
565 << ", this is not supported.";
566 }
567
568 LOG(FATAL) << "Header is different between log file chunks "
569 << filenames_[0] << " and " << filenames_[i]
570 << ", this is not supported.";
571 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700572 }
Austin Schuh64fab802020-09-09 22:47:47 -0700573 // Put the parts index back to the first log file chunk.
574 log_file_header_.mutable_message()->mutate_parts_index(
575 message_reader_->log_file_header()->parts_index());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700576
Austin Schuh6f3babe2020-01-26 20:34:50 -0800577 // Setup per channel state.
Austin Schuh05b70472020-01-01 17:11:17 -0800578 channels_.resize(configuration()->channels()->size());
Austin Schuh6f3babe2020-01-26 20:34:50 -0800579 for (ChannelData &channel_data : channels_) {
580 channel_data.data.split_reader = this;
581 // Build up the timestamp list.
582 if (configuration::MultiNode(configuration())) {
583 channel_data.timestamps.resize(configuration()->nodes()->size());
584 for (MessageHeaderQueue &queue : channel_data.timestamps) {
585 queue.timestamps = true;
586 queue.split_reader = this;
587 }
588 }
589 }
Austin Schuh05b70472020-01-01 17:11:17 -0800590
Austin Schuh6f3babe2020-01-26 20:34:50 -0800591 // Build up channels_to_write_ as an optimization to make it fast to figure
592 // out which datastructure to place any new data from a channel on.
593 for (const Channel *channel : *configuration()->channels()) {
594 // This is the main case. We will only see data on this node.
595 if (configuration::ChannelIsSendableOnNode(channel, node())) {
596 channels_to_write_.emplace_back(
597 &channels_[channels_to_write_.size()].data);
598 } else
599 // If we can't send, but can receive, we should be able to see
600 // timestamps here.
601 if (configuration::ChannelIsReadableOnNode(channel, node())) {
602 channels_to_write_.emplace_back(
603 &(channels_[channels_to_write_.size()]
604 .timestamps[configuration::GetNodeIndex(configuration(),
605 node())]));
606 } else {
607 channels_to_write_.emplace_back(nullptr);
608 }
609 }
Austin Schuh05b70472020-01-01 17:11:17 -0800610}
611
Austin Schuh6f3babe2020-01-26 20:34:50 -0800612bool SplitMessageReader::NextLogFile() {
Austin Schuhfa895892020-01-07 20:07:41 -0800613 if (next_filename_index_ == filenames_.size()) {
614 return false;
615 }
616 message_reader_ =
617 std::make_unique<MessageReader>(filenames_[next_filename_index_]);
618
619 // We can't support the config diverging between two log file headers. See if
620 // they are the same.
621 if (next_filename_index_ != 0) {
Austin Schuh64fab802020-09-09 22:47:47 -0700622 // In order for the headers to identically compare, they need to have the
623 // same parts_index. Rewrite the saved header with the new parts_index,
624 // compare, and then restore.
625 const int32_t original_parts_index =
626 log_file_header_.message().parts_index();
627 log_file_header_.mutable_message()->mutate_parts_index(
628 message_reader_->log_file_header()->parts_index());
629
Austin Schuh97789fc2020-08-01 14:42:45 -0700630 CHECK(CompareFlatBuffer(message_reader_->raw_log_file_header(),
631 log_file_header_))
Austin Schuhfa895892020-01-07 20:07:41 -0800632 << ": Header is different between log file chunks "
633 << filenames_[next_filename_index_] << " and "
634 << filenames_[next_filename_index_ - 1] << ", this is not supported.";
Austin Schuh64fab802020-09-09 22:47:47 -0700635
636 log_file_header_.mutable_message()->mutate_parts_index(
637 original_parts_index);
Austin Schuhfa895892020-01-07 20:07:41 -0800638 }
639
640 ++next_filename_index_;
641 return true;
642}
643
Austin Schuh6f3babe2020-01-26 20:34:50 -0800644bool SplitMessageReader::QueueMessages(
Austin Schuhcde938c2020-02-02 17:30:07 -0800645 monotonic_clock::time_point last_dequeued_time) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800646 // TODO(austin): Once we are happy that everything works, read a 256kb chunk
647 // to reduce the need to re-heap down below.
Austin Schuhcde938c2020-02-02 17:30:07 -0800648
649 // Special case no more data. Otherwise we blow up on the CHECK statement
650 // confirming that we have enough data queued.
651 if (at_end_) {
652 return false;
653 }
654
655 // If this isn't the first time around, confirm that we had enough data queued
656 // to follow the contract.
657 if (time_to_queue_ != monotonic_clock::min_time) {
658 CHECK_LE(last_dequeued_time,
659 newest_timestamp() - max_out_of_order_duration())
660 << " node " << FlatbufferToJson(node()) << " on " << this;
661
662 // Bail if there is enough data already queued.
663 if (last_dequeued_time < time_to_queue_) {
Austin Schuhee711052020-08-24 16:06:09 -0700664 VLOG(1) << MaybeNodeName(target_node_) << "All up to date on " << this
665 << ", dequeued " << last_dequeued_time << " queue time "
666 << time_to_queue_;
Austin Schuhcde938c2020-02-02 17:30:07 -0800667 return true;
668 }
669 } else {
670 // Startup takes a special dance. We want to queue up until the start time,
671 // but we then want to find the next message to read. The conservative
672 // answer is to immediately trigger a second requeue to get things moving.
673 time_to_queue_ = monotonic_start_time();
Austin Schuheeba0292020-10-11 16:20:05 -0700674 CHECK_NE(time_to_queue_, monotonic_clock::min_time);
Austin Schuhcde938c2020-02-02 17:30:07 -0800675 QueueMessages(time_to_queue_);
676 }
677
678 // If we are asked to queue, queue for at least max_out_of_order_duration past
679 // the last known time in the log file (ie the newest timestep read). As long
680 // as we requeue exactly when time_to_queue_ is dequeued and go no further, we
681 // are safe. And since we pop in order, that works.
682 //
683 // Special case the start of the log file. There should be at most 1 message
684 // from each channel at the start of the log file. So always force the start
685 // of the log file to just be read.
686 time_to_queue_ = std::max(time_to_queue_, newest_timestamp());
Austin Schuhee711052020-08-24 16:06:09 -0700687 VLOG(1) << MaybeNodeName(target_node_) << "Queueing, going until "
688 << time_to_queue_ << " " << filename();
Austin Schuhcde938c2020-02-02 17:30:07 -0800689
690 bool was_emplaced = false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800691 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800692 // Stop if we have enough.
Brian Silverman98360e22020-04-28 16:51:20 -0700693 if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&
Austin Schuhcde938c2020-02-02 17:30:07 -0800694 was_emplaced) {
Austin Schuhee711052020-08-24 16:06:09 -0700695 VLOG(1) << MaybeNodeName(target_node_) << "Done queueing on " << this
696 << ", queued to " << newest_timestamp() << " with requeue time "
697 << time_to_queue_;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800698 return true;
699 }
Austin Schuh05b70472020-01-01 17:11:17 -0800700
Austin Schuhadd6eb32020-11-09 21:24:26 -0800701 if (std::optional<SizePrefixedFlatbufferVector<MessageHeader>> msg =
Austin Schuh6f3babe2020-01-26 20:34:50 -0800702 message_reader_->ReadMessage()) {
703 const MessageHeader &header = msg.value().message();
704
Austin Schuhcde938c2020-02-02 17:30:07 -0800705 const monotonic_clock::time_point timestamp = monotonic_clock::time_point(
706 chrono::nanoseconds(header.monotonic_sent_time()));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800707
Austin Schuh0b5fd032020-03-28 17:36:49 -0700708 if (VLOG_IS_ON(2)) {
Brian Silvermand90905f2020-09-23 14:42:56 -0700709 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
710 << filename() << " ttq: " << time_to_queue_ << " now "
Austin Schuhee711052020-08-24 16:06:09 -0700711 << newest_timestamp() << " start time "
712 << monotonic_start_time() << " " << FlatbufferToJson(&header);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700713 } else if (VLOG_IS_ON(1)) {
Austin Schuhadd6eb32020-11-09 21:24:26 -0800714 SizePrefixedFlatbufferVector<MessageHeader> copy = msg.value();
Austin Schuh0b5fd032020-03-28 17:36:49 -0700715 copy.mutable_message()->clear_data();
Austin Schuhee711052020-08-24 16:06:09 -0700716 LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "
717 << filename() << " ttq: " << time_to_queue_ << " now "
718 << newest_timestamp() << " start time "
719 << monotonic_start_time() << " " << FlatbufferToJson(copy);
Austin Schuh0b5fd032020-03-28 17:36:49 -0700720 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800721
722 const int channel_index = header.channel_index();
723 was_emplaced = channels_to_write_[channel_index]->emplace_back(
724 std::move(msg.value()));
725 if (was_emplaced) {
726 newest_timestamp_ = std::max(newest_timestamp_, timestamp);
727 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800728 } else {
729 if (!NextLogFile()) {
Austin Schuhee711052020-08-24 16:06:09 -0700730 VLOG(1) << MaybeNodeName(target_node_) << "No more files, last was "
731 << filenames_.back();
Austin Schuhcde938c2020-02-02 17:30:07 -0800732 at_end_ = true;
Austin Schuh8bd96322020-02-13 21:18:22 -0800733 for (MessageHeaderQueue *queue : channels_to_write_) {
734 if (queue == nullptr || queue->timestamp_merger == nullptr) {
735 continue;
736 }
737 queue->timestamp_merger->NoticeAtEnd();
738 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800739 return false;
740 }
741 }
Austin Schuh05b70472020-01-01 17:11:17 -0800742 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800743}
744
745void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,
746 int channel_index,
747 const Node *target_node) {
748 const Node *reinterpreted_target_node =
749 configuration::GetNodeOrDie(configuration(), target_node);
Austin Schuhee711052020-08-24 16:06:09 -0700750 target_node_ = reinterpreted_target_node;
751
Austin Schuh6f3babe2020-01-26 20:34:50 -0800752 const Channel *const channel =
753 configuration()->channels()->Get(channel_index);
754
Austin Schuhcde938c2020-02-02 17:30:07 -0800755 VLOG(1) << " Configuring merger " << this << " for channel " << channel_index
756 << " "
757 << configuration::CleanedChannelToString(
758 configuration()->channels()->Get(channel_index));
759
Austin Schuh6f3babe2020-01-26 20:34:50 -0800760 MessageHeaderQueue *message_header_queue = nullptr;
761
762 // Figure out if this log file is from our point of view, or the other node's
763 // point of view.
764 if (node() == reinterpreted_target_node) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800765 VLOG(1) << " Replaying as logged node " << filename();
766
767 if (configuration::ChannelIsSendableOnNode(channel, node())) {
768 VLOG(1) << " Data on node";
769 message_header_queue = &(channels_[channel_index].data);
770 } else if (configuration::ChannelIsReadableOnNode(channel, node())) {
771 VLOG(1) << " Timestamps on node";
772 message_header_queue =
773 &(channels_[channel_index].timestamps[configuration::GetNodeIndex(
774 configuration(), node())]);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800775 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800776 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800777 }
778 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800779 VLOG(1) << " Replaying as other node " << filename();
Austin Schuh6f3babe2020-01-26 20:34:50 -0800780 // We are replaying from another node's point of view. The only interesting
Austin Schuhcde938c2020-02-02 17:30:07 -0800781 // data is data that is sent from our node and received on theirs.
782 if (configuration::ChannelIsReadableOnNode(channel,
783 reinterpreted_target_node) &&
784 configuration::ChannelIsSendableOnNode(channel, node())) {
785 VLOG(1) << " Readable on target node";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800786 // Data from another node.
787 message_header_queue = &(channels_[channel_index].data);
788 } else {
Austin Schuhcde938c2020-02-02 17:30:07 -0800789 VLOG(1) << " Dropping";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800790 // This is either not sendable on the other node, or is a timestamp and
791 // therefore not interesting.
792 }
793 }
794
795 // If we found one, write it down. This will be nullptr when there is nothing
796 // relevant on this channel on this node for the target node. In that case,
797 // we want to drop the message instead of queueing it.
798 if (message_header_queue != nullptr) {
799 message_header_queue->timestamp_merger = timestamp_merger;
800 }
801}
802
803std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -0800804 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -0800805SplitMessageReader::PopOldest(int channel_index) {
806 CHECK_GT(channels_[channel_index].data.size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800807 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
808 timestamp = channels_[channel_index].data.front_timestamp();
Austin Schuhadd6eb32020-11-09 21:24:26 -0800809 SizePrefixedFlatbufferVector<MessageHeader> front =
Austin Schuh6f3babe2020-01-26 20:34:50 -0800810 std::move(channels_[channel_index].data.front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700811 channels_[channel_index].data.PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800812
Austin Schuh2f8fd752020-09-01 22:38:28 -0700813 VLOG(1) << MaybeNodeName(target_node_) << "Popped Data " << this << " "
814 << std::get<0>(timestamp) << " for "
815 << configuration::StrippedChannelToString(
816 configuration()->channels()->Get(channel_index))
817 << " (" << channel_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800818
819 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800820
821 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
822 std::move(front));
823}
824
825std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -0800826 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -0700827SplitMessageReader::PopOldestTimestamp(int channel, int node_index) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800828 CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);
Austin Schuhcde938c2020-02-02 17:30:07 -0800829 const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
830 timestamp = channels_[channel].timestamps[node_index].front_timestamp();
Austin Schuhadd6eb32020-11-09 21:24:26 -0800831 SizePrefixedFlatbufferVector<MessageHeader> front =
Austin Schuh6f3babe2020-01-26 20:34:50 -0800832 std::move(channels_[channel].timestamps[node_index].front());
Austin Schuh2f8fd752020-09-01 22:38:28 -0700833 channels_[channel].timestamps[node_index].PopFront();
Austin Schuhcde938c2020-02-02 17:30:07 -0800834
Austin Schuh2f8fd752020-09-01 22:38:28 -0700835 VLOG(1) << MaybeNodeName(target_node_) << "Popped timestamp " << this << " "
Austin Schuhee711052020-08-24 16:06:09 -0700836 << std::get<0>(timestamp) << " for "
837 << configuration::StrippedChannelToString(
838 configuration()->channels()->Get(channel))
Austin Schuh2f8fd752020-09-01 22:38:28 -0700839 << " on "
840 << configuration()->nodes()->Get(node_index)->name()->string_view()
841 << " (" << node_index << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -0800842
843 QueueMessages(std::get<0>(timestamp));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800844
845 return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),
846 std::move(front));
847}
848
Austin Schuhcde938c2020-02-02 17:30:07 -0800849bool SplitMessageReader::MessageHeaderQueue::emplace_back(
Austin Schuhadd6eb32020-11-09 21:24:26 -0800850 SizePrefixedFlatbufferVector<MessageHeader> &&msg) {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800851 CHECK(split_reader != nullptr);
852
853 // If there is no timestamp merger for this queue, nobody is listening. Drop
854 // the message. This happens when a log file from another node is replayed,
855 // and the timestamp mergers down stream just don't care.
856 if (timestamp_merger == nullptr) {
Austin Schuhcde938c2020-02-02 17:30:07 -0800857 return false;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800858 }
859
860 CHECK(timestamps != msg.message().has_data())
861 << ": Got timestamps and data mixed up on a node. "
862 << FlatbufferToJson(msg);
863
864 data_.emplace_back(std::move(msg));
865
866 if (data_.size() == 1u) {
867 // Yup, new data. Notify.
868 if (timestamps) {
869 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
870 } else {
871 timestamp_merger->Update(split_reader, front_timestamp());
872 }
873 }
Austin Schuhcde938c2020-02-02 17:30:07 -0800874
875 return true;
Austin Schuh6f3babe2020-01-26 20:34:50 -0800876}
877
Austin Schuh2f8fd752020-09-01 22:38:28 -0700878void SplitMessageReader::MessageHeaderQueue::PopFront() {
Austin Schuh6f3babe2020-01-26 20:34:50 -0800879 data_.pop_front();
880 if (data_.size() != 0u) {
881 // Yup, new data.
882 if (timestamps) {
883 timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());
884 } else {
885 timestamp_merger->Update(split_reader, front_timestamp());
886 }
Austin Schuh2f8fd752020-09-01 22:38:28 -0700887 } else {
888 // Poke anyways to update the heap.
889 if (timestamps) {
890 timestamp_merger->UpdateTimestamp(
891 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
892 } else {
893 timestamp_merger->Update(
894 nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));
895 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800896 }
Austin Schuh05b70472020-01-01 17:11:17 -0800897}
898
899namespace {
900
Austin Schuh6f3babe2020-01-26 20:34:50 -0800901bool SplitMessageReaderHeapCompare(
902 const std::tuple<monotonic_clock::time_point, uint32_t,
903 SplitMessageReader *>
904 first,
905 const std::tuple<monotonic_clock::time_point, uint32_t,
906 SplitMessageReader *>
907 second) {
908 if (std::get<0>(first) > std::get<0>(second)) {
909 return true;
910 } else if (std::get<0>(first) == std::get<0>(second)) {
911 if (std::get<1>(first) > std::get<1>(second)) {
912 return true;
913 } else if (std::get<1>(first) == std::get<1>(second)) {
914 return std::get<2>(first) > std::get<2>(second);
915 } else {
916 return false;
917 }
918 } else {
919 return false;
920 }
921}
922
Austin Schuh05b70472020-01-01 17:11:17 -0800923bool ChannelHeapCompare(
924 const std::pair<monotonic_clock::time_point, int> first,
925 const std::pair<monotonic_clock::time_point, int> second) {
926 if (first.first > second.first) {
927 return true;
928 } else if (first.first == second.first) {
929 return first.second > second.second;
930 } else {
931 return false;
932 }
933}
934
935} // namespace
936
Austin Schuh6f3babe2020-01-26 20:34:50 -0800937TimestampMerger::TimestampMerger(
938 const Configuration *configuration,
939 std::vector<SplitMessageReader *> split_message_readers, int channel_index,
940 const Node *target_node, ChannelMerger *channel_merger)
941 : configuration_(configuration),
942 split_message_readers_(std::move(split_message_readers)),
943 channel_index_(channel_index),
944 node_index_(configuration::MultiNode(configuration)
945 ? configuration::GetNodeIndex(configuration, target_node)
946 : -1),
947 channel_merger_(channel_merger) {
948 // Tell the readers we care so they know who to notify.
Austin Schuhcde938c2020-02-02 17:30:07 -0800949 VLOG(1) << "Configuring channel " << channel_index << " target node "
950 << FlatbufferToJson(target_node);
Austin Schuh6f3babe2020-01-26 20:34:50 -0800951 for (SplitMessageReader *reader : split_message_readers_) {
952 reader->SetTimestampMerger(this, channel_index, target_node);
953 }
954
955 // And then determine if we need to track timestamps.
956 const Channel *channel = configuration->channels()->Get(channel_index);
957 if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&
958 configuration::ChannelIsReadableOnNode(channel, target_node)) {
959 has_timestamps_ = true;
960 }
961}
962
963void TimestampMerger::PushMessageHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -0800964 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
965 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -0800966 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700967 if (split_message_reader != nullptr) {
968 DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),
969 [split_message_reader](
970 const std::tuple<monotonic_clock::time_point,
971 uint32_t, SplitMessageReader *>
972 x) {
973 return std::get<2>(x) == split_message_reader;
974 }) == message_heap_.end())
975 << ": Pushing message when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -0800976
Austin Schuh2f8fd752020-09-01 22:38:28 -0700977 message_heap_.push_back(std::make_tuple(
978 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -0800979
Austin Schuh2f8fd752020-09-01 22:38:28 -0700980 std::push_heap(message_heap_.begin(), message_heap_.end(),
981 &SplitMessageReaderHeapCompare);
982 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800983
984 // If we are just a data merger, don't wait for timestamps.
985 if (!has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -0700986 if (!message_heap_.empty()) {
987 channel_merger_->Update(std::get<0>(message_heap_[0]), channel_index_);
988 pushed_ = true;
989 } else {
990 // Remove ourselves if we are empty.
991 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
992 }
Austin Schuh6f3babe2020-01-26 20:34:50 -0800993 }
994}
995
Austin Schuhcde938c2020-02-02 17:30:07 -0800996std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
997TimestampMerger::oldest_message() const {
998 CHECK_GT(message_heap_.size(), 0u);
999 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1000 oldest_message_reader = message_heap_.front();
1001 return std::get<2>(oldest_message_reader)->oldest_message(channel_index_);
1002}
1003
1004std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1005TimestampMerger::oldest_timestamp() const {
1006 CHECK_GT(timestamp_heap_.size(), 0u);
1007 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1008 oldest_message_reader = timestamp_heap_.front();
1009 return std::get<2>(oldest_message_reader)
1010 ->oldest_message(channel_index_, node_index_);
1011}
1012
Austin Schuh6f3babe2020-01-26 20:34:50 -08001013void TimestampMerger::PushTimestampHeap(
Austin Schuhcde938c2020-02-02 17:30:07 -08001014 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1015 timestamp,
Austin Schuh6f3babe2020-01-26 20:34:50 -08001016 SplitMessageReader *split_message_reader) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001017 if (split_message_reader != nullptr) {
1018 DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),
1019 [split_message_reader](
1020 const std::tuple<monotonic_clock::time_point,
1021 uint32_t, SplitMessageReader *>
1022 x) {
1023 return std::get<2>(x) == split_message_reader;
1024 }) == timestamp_heap_.end())
1025 << ": Pushing timestamp when it is already in the heap.";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001026
Austin Schuh2f8fd752020-09-01 22:38:28 -07001027 timestamp_heap_.push_back(std::make_tuple(
1028 std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001029
Austin Schuh2f8fd752020-09-01 22:38:28 -07001030 std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1031 SplitMessageReaderHeapCompare);
1032 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001033
1034 // If we are a timestamp merger, don't wait for data. Missing data will be
1035 // caught at read time.
1036 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001037 if (!timestamp_heap_.empty()) {
1038 channel_merger_->Update(std::get<0>(timestamp_heap_[0]), channel_index_);
1039 pushed_ = true;
1040 } else {
1041 // Remove ourselves if we are empty.
1042 channel_merger_->Update(monotonic_clock::min_time, channel_index_);
1043 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001044 }
1045}
1046
1047std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001048 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001049TimestampMerger::PopMessageHeap() {
1050 // Pop the oldest message reader pointer off the heap.
1051 CHECK_GT(message_heap_.size(), 0u);
1052 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1053 oldest_message_reader = message_heap_.front();
1054
1055 std::pop_heap(message_heap_.begin(), message_heap_.end(),
1056 &SplitMessageReaderHeapCompare);
1057 message_heap_.pop_back();
1058
1059 // Pop the oldest message. This re-pushes any messages from the reader to the
1060 // message heap.
1061 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001062 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001063 oldest_message =
1064 std::get<2>(oldest_message_reader)->PopOldest(channel_index_);
1065
1066 // Confirm that the time and queue_index we have recorded matches.
1067 CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));
1068 CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));
1069
1070 // Now, keep reading until we have found all duplicates.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001071 while (!message_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001072 // See if it is a duplicate.
1073 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1074 next_oldest_message_reader = message_heap_.front();
1075
Austin Schuhcde938c2020-02-02 17:30:07 -08001076 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1077 next_oldest_message_time = std::get<2>(next_oldest_message_reader)
1078 ->oldest_message(channel_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001079
1080 if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&
1081 std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {
1082 // Pop the message reader pointer.
1083 std::pop_heap(message_heap_.begin(), message_heap_.end(),
1084 &SplitMessageReaderHeapCompare);
1085 message_heap_.pop_back();
1086
1087 // Pop the next oldest message. This re-pushes any messages from the
1088 // reader.
1089 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001090 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001091 next_oldest_message = std::get<2>(next_oldest_message_reader)
1092 ->PopOldest(channel_index_);
1093
1094 // And make sure the message matches in it's entirety.
1095 CHECK(std::get<2>(oldest_message).span() ==
1096 std::get<2>(next_oldest_message).span())
1097 << ": Data at the same timestamp doesn't match.";
1098 } else {
1099 break;
1100 }
1101 }
1102
1103 return oldest_message;
1104}
1105
1106std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001107 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001108TimestampMerger::PopTimestampHeap() {
1109 // Pop the oldest message reader pointer off the heap.
1110 CHECK_GT(timestamp_heap_.size(), 0u);
1111
1112 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1113 oldest_timestamp_reader = timestamp_heap_.front();
1114
1115 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1116 &SplitMessageReaderHeapCompare);
1117 timestamp_heap_.pop_back();
1118
1119 CHECK(node_index_ != -1) << ": Timestamps in a single node environment";
1120
1121 // Pop the oldest message. This re-pushes any timestamps from the reader to
1122 // the timestamp heap.
1123 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001124 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001125 oldest_timestamp = std::get<2>(oldest_timestamp_reader)
Austin Schuh2f8fd752020-09-01 22:38:28 -07001126 ->PopOldestTimestamp(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001127
1128 // Confirm that the time we have recorded matches.
1129 CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));
1130 CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));
1131
Austin Schuh2f8fd752020-09-01 22:38:28 -07001132 // Now, keep reading until we have found all duplicates.
1133 while (!timestamp_heap_.empty()) {
1134 // See if it is a duplicate.
1135 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1136 next_oldest_timestamp_reader = timestamp_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001137
Austin Schuh2f8fd752020-09-01 22:38:28 -07001138 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1139 next_oldest_timestamp_time =
1140 std::get<2>(next_oldest_timestamp_reader)
1141 ->oldest_message(channel_index_, node_index_);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001142
Austin Schuh2f8fd752020-09-01 22:38:28 -07001143 if (std::get<0>(next_oldest_timestamp_time) ==
1144 std::get<0>(oldest_timestamp) &&
1145 std::get<1>(next_oldest_timestamp_time) ==
1146 std::get<1>(oldest_timestamp)) {
1147 // Pop the timestamp reader pointer.
1148 std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),
1149 &SplitMessageReaderHeapCompare);
1150 timestamp_heap_.pop_back();
1151
1152 // Pop the next oldest timestamp. This re-pushes any messages from the
1153 // reader.
1154 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001155 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh2f8fd752020-09-01 22:38:28 -07001156 next_oldest_timestamp =
1157 std::get<2>(next_oldest_timestamp_reader)
1158 ->PopOldestTimestamp(channel_index_, node_index_);
1159
1160 // And make sure the contents matches in it's entirety.
1161 CHECK(std::get<2>(oldest_timestamp).span() ==
1162 std::get<2>(next_oldest_timestamp).span())
1163 << ": Data at the same timestamp doesn't match, "
1164 << aos::FlatbufferToJson(std::get<2>(oldest_timestamp)) << " vs "
1165 << aos::FlatbufferToJson(std::get<2>(next_oldest_timestamp)) << " "
1166 << absl::BytesToHexString(std::string_view(
1167 reinterpret_cast<const char *>(
1168 std::get<2>(oldest_timestamp).span().data()),
1169 std::get<2>(oldest_timestamp).span().size()))
1170 << " vs "
1171 << absl::BytesToHexString(std::string_view(
1172 reinterpret_cast<const char *>(
1173 std::get<2>(next_oldest_timestamp).span().data()),
1174 std::get<2>(next_oldest_timestamp).span().size()));
1175
1176 } else {
1177 break;
1178 }
Austin Schuh8bd96322020-02-13 21:18:22 -08001179 }
1180
Austin Schuh2f8fd752020-09-01 22:38:28 -07001181 return oldest_timestamp;
Austin Schuh8bd96322020-02-13 21:18:22 -08001182}
1183
Austin Schuhadd6eb32020-11-09 21:24:26 -08001184std::tuple<TimestampMerger::DeliveryTimestamp,
1185 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001186TimestampMerger::PopOldest() {
1187 if (has_timestamps_) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001188 VLOG(1) << "Looking for matching timestamp for "
1189 << configuration::StrippedChannelToString(
1190 configuration_->channels()->Get(channel_index_))
1191 << " (" << channel_index_ << ") "
1192 << " at " << std::get<0>(oldest_timestamp());
1193
Austin Schuh8bd96322020-02-13 21:18:22 -08001194 // Read the timestamps.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001195 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001196 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001197 oldest_timestamp = PopTimestampHeap();
1198
1199 TimestampMerger::DeliveryTimestamp timestamp;
1200 timestamp.monotonic_event_time =
1201 monotonic_clock::time_point(chrono::nanoseconds(
1202 std::get<2>(oldest_timestamp).message().monotonic_sent_time()));
1203 timestamp.realtime_event_time =
1204 realtime_clock::time_point(chrono::nanoseconds(
1205 std::get<2>(oldest_timestamp).message().realtime_sent_time()));
Austin Schuh8d7e0bb2020-10-02 17:57:00 -07001206 timestamp.queue_index =
1207 std::get<2>(oldest_timestamp).message().queue_index();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001208
1209 // Consistency check.
1210 CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));
1211 CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),
1212 std::get<1>(oldest_timestamp));
1213
1214 monotonic_clock::time_point remote_timestamp_monotonic_time(
1215 chrono::nanoseconds(
1216 std::get<2>(oldest_timestamp).message().monotonic_remote_time()));
1217
Austin Schuh8bd96322020-02-13 21:18:22 -08001218 // See if we have any data. If not, pass the problem up the chain.
Brian Silverman8a32ce62020-08-12 12:02:38 -07001219 if (message_heap_.empty()) {
Austin Schuhee711052020-08-24 16:06:09 -07001220 LOG(WARNING) << MaybeNodeName(configuration_->nodes()->Get(node_index_))
1221 << "No data to match timestamp on "
1222 << configuration::CleanedChannelToString(
1223 configuration_->channels()->Get(channel_index_))
1224 << " (" << channel_index_ << ")";
Austin Schuh8bd96322020-02-13 21:18:22 -08001225 return std::make_tuple(timestamp,
1226 std::move(std::get<2>(oldest_timestamp)));
1227 }
1228
Austin Schuh6f3babe2020-01-26 20:34:50 -08001229 while (true) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001230 {
1231 // Ok, now try grabbing data until we find one which matches.
1232 std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>
1233 oldest_message_ref = oldest_message();
1234
1235 // Time at which the message was sent (this message is written from the
1236 // sending node's perspective.
1237 monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(
1238 std::get<2>(oldest_message_ref)->monotonic_sent_time()));
1239
1240 if (remote_monotonic_time < remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001241 LOG(WARNING) << configuration_->nodes()
1242 ->Get(node_index_)
1243 ->name()
1244 ->string_view()
1245 << " Undelivered message, skipping. Remote time is "
1246 << remote_monotonic_time << " timestamp is "
1247 << remote_timestamp_monotonic_time << " on channel "
1248 << configuration::StrippedChannelToString(
1249 configuration_->channels()->Get(channel_index_))
1250 << " (" << channel_index_ << ")";
Austin Schuhcde938c2020-02-02 17:30:07 -08001251 PopMessageHeap();
1252 continue;
1253 } else if (remote_monotonic_time > remote_timestamp_monotonic_time) {
Austin Schuhee711052020-08-24 16:06:09 -07001254 LOG(WARNING) << configuration_->nodes()
1255 ->Get(node_index_)
1256 ->name()
1257 ->string_view()
1258 << " Data not found. Remote time should be "
1259 << remote_timestamp_monotonic_time
1260 << ", message time is " << remote_monotonic_time
1261 << " on channel "
1262 << configuration::StrippedChannelToString(
1263 configuration_->channels()->Get(channel_index_))
Austin Schuh2f8fd752020-09-01 22:38:28 -07001264 << " (" << channel_index_ << ")"
1265 << (VLOG_IS_ON(1) ? DebugString() : "");
Austin Schuhcde938c2020-02-02 17:30:07 -08001266 return std::make_tuple(timestamp,
1267 std::move(std::get<2>(oldest_timestamp)));
1268 }
1269
1270 timestamp.monotonic_remote_time = remote_monotonic_time;
1271 }
1272
Austin Schuh2f8fd752020-09-01 22:38:28 -07001273 VLOG(1) << "Found matching data "
1274 << configuration::StrippedChannelToString(
1275 configuration_->channels()->Get(channel_index_))
1276 << " (" << channel_index_ << ")";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001277 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001278 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001279 oldest_message = PopMessageHeap();
1280
Austin Schuh6f3babe2020-01-26 20:34:50 -08001281 timestamp.realtime_remote_time =
1282 realtime_clock::time_point(chrono::nanoseconds(
1283 std::get<2>(oldest_message).message().realtime_sent_time()));
1284 timestamp.remote_queue_index =
1285 std::get<2>(oldest_message).message().queue_index();
1286
Austin Schuhcde938c2020-02-02 17:30:07 -08001287 CHECK_EQ(timestamp.monotonic_remote_time,
1288 remote_timestamp_monotonic_time);
1289
1290 CHECK_EQ(timestamp.remote_queue_index,
1291 std::get<2>(oldest_timestamp).message().remote_queue_index())
1292 << ": " << FlatbufferToJson(&std::get<2>(oldest_timestamp).message())
1293 << " data "
1294 << FlatbufferToJson(&std::get<2>(oldest_message).message());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001295
Austin Schuh30dd5c52020-08-01 14:43:44 -07001296 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001297 }
1298 } else {
1299 std::tuple<monotonic_clock::time_point, uint32_t,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001300 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001301 oldest_message = PopMessageHeap();
1302
1303 TimestampMerger::DeliveryTimestamp timestamp;
1304 timestamp.monotonic_event_time =
1305 monotonic_clock::time_point(chrono::nanoseconds(
1306 std::get<2>(oldest_message).message().monotonic_sent_time()));
1307 timestamp.realtime_event_time =
1308 realtime_clock::time_point(chrono::nanoseconds(
1309 std::get<2>(oldest_message).message().realtime_sent_time()));
Austin Schuh8d7e0bb2020-10-02 17:57:00 -07001310 timestamp.queue_index = std::get<2>(oldest_message).message().queue_index();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001311 timestamp.remote_queue_index = 0xffffffff;
1312
1313 CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);
1314 CHECK_EQ(std::get<1>(oldest_message),
1315 std::get<2>(oldest_message).message().queue_index());
1316
Austin Schuh30dd5c52020-08-01 14:43:44 -07001317 return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));
Austin Schuh6f3babe2020-01-26 20:34:50 -08001318 }
1319}
1320
Austin Schuh8bd96322020-02-13 21:18:22 -08001321void TimestampMerger::NoticeAtEnd() { channel_merger_->NoticeAtEnd(); }
1322
Austin Schuh6f3babe2020-01-26 20:34:50 -08001323namespace {
1324std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(
1325 const std::vector<std::vector<std::string>> &filenames) {
1326 CHECK_GT(filenames.size(), 0u);
1327 // Build up all the SplitMessageReaders.
1328 std::vector<std::unique_ptr<SplitMessageReader>> result;
1329 for (const std::vector<std::string> &filenames : filenames) {
1330 result.emplace_back(std::make_unique<SplitMessageReader>(filenames));
1331 }
1332 return result;
1333}
1334} // namespace
1335
1336ChannelMerger::ChannelMerger(
1337 const std::vector<std::vector<std::string>> &filenames)
1338 : split_message_readers_(MakeSplitMessageReaders(filenames)),
Austin Schuh97789fc2020-08-01 14:42:45 -07001339 log_file_header_(split_message_readers_[0]->raw_log_file_header()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001340 // Now, confirm that the configuration matches for each and pick a start time.
1341 // Also return the list of possible nodes.
1342 for (const std::unique_ptr<SplitMessageReader> &reader :
1343 split_message_readers_) {
1344 CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),
1345 reader->log_file_header()->configuration()))
1346 << ": Replaying log files with different configurations isn't "
1347 "supported";
1348 }
1349
1350 nodes_ = configuration::GetNodes(configuration());
1351}
1352
1353bool ChannelMerger::SetNode(const Node *target_node) {
1354 std::vector<SplitMessageReader *> split_message_readers;
1355 for (const std::unique_ptr<SplitMessageReader> &reader :
1356 split_message_readers_) {
1357 split_message_readers.emplace_back(reader.get());
1358 }
1359
1360 // Go find a log_file_header for this node.
1361 {
1362 bool found_node = false;
1363
1364 for (const std::unique_ptr<SplitMessageReader> &reader :
1365 split_message_readers_) {
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001366 // In order to identify which logfile(s) map to the target node, do a
1367 // logical comparison of the nodes, by confirming that we are either in a
1368 // single-node setup (where the nodes will both be nullptr) or that the
1369 // node names match (but the other node fields--e.g., hostname lists--may
1370 // not).
1371 const bool both_null =
1372 reader->node() == nullptr && target_node == nullptr;
1373 const bool both_have_name =
1374 (reader->node() != nullptr) && (target_node != nullptr) &&
1375 (reader->node()->has_name() && target_node->has_name());
1376 const bool node_names_identical =
Brian Silvermand90905f2020-09-23 14:42:56 -07001377 both_have_name && (reader->node()->name()->string_view() ==
1378 target_node->name()->string_view());
James Kuszmaulfc273dc2020-05-09 17:56:19 -07001379 if (both_null || node_names_identical) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001380 if (!found_node) {
1381 found_node = true;
Austin Schuhadd6eb32020-11-09 21:24:26 -08001382 log_file_header_ = reader->raw_log_file_header();
Austin Schuhcde938c2020-02-02 17:30:07 -08001383 VLOG(1) << "Found log file " << reader->filename() << " with node "
1384 << FlatbufferToJson(reader->node()) << " start_time "
1385 << monotonic_start_time();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001386 } else {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001387 // Find the earliest start time. That way, if we get a full log file
1388 // directly from the node, and a partial later, we start with the
1389 // full. Update our header to match that.
1390 const monotonic_clock::time_point new_monotonic_start_time(
1391 chrono::nanoseconds(
1392 reader->log_file_header()->monotonic_start_time()));
1393 const realtime_clock::time_point new_realtime_start_time(
1394 chrono::nanoseconds(
1395 reader->log_file_header()->realtime_start_time()));
1396
1397 if (monotonic_start_time() == monotonic_clock::min_time ||
1398 (new_monotonic_start_time != monotonic_clock::min_time &&
1399 new_monotonic_start_time < monotonic_start_time())) {
1400 log_file_header_.mutable_message()->mutate_monotonic_start_time(
1401 new_monotonic_start_time.time_since_epoch().count());
1402 log_file_header_.mutable_message()->mutate_realtime_start_time(
1403 new_realtime_start_time.time_since_epoch().count());
1404 VLOG(1) << "Updated log file " << reader->filename()
1405 << " with node " << FlatbufferToJson(reader->node())
1406 << " start_time " << new_monotonic_start_time;
1407 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001408 }
1409 }
1410 }
1411
1412 if (!found_node) {
1413 LOG(WARNING) << "Failed to find log file for node "
1414 << FlatbufferToJson(target_node);
1415 return false;
1416 }
1417 }
1418
1419 // Build up all the timestamp mergers. This connects up all the
1420 // SplitMessageReaders.
1421 timestamp_mergers_.reserve(configuration()->channels()->size());
1422 for (size_t channel_index = 0;
1423 channel_index < configuration()->channels()->size(); ++channel_index) {
1424 timestamp_mergers_.emplace_back(
1425 configuration(), split_message_readers, channel_index,
1426 configuration::GetNode(configuration(), target_node), this);
1427 }
1428
1429 // And prime everything.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001430 for (std::unique_ptr<SplitMessageReader> &split_message_reader :
1431 split_message_readers_) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001432 split_message_reader->QueueMessages(
1433 split_message_reader->monotonic_start_time());
Austin Schuh6f3babe2020-01-26 20:34:50 -08001434 }
1435
1436 node_ = configuration::GetNodeOrDie(configuration(), target_node);
1437 return true;
1438}
1439
Austin Schuh858c9f32020-08-31 16:56:12 -07001440monotonic_clock::time_point ChannelMerger::OldestMessageTime() const {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001441 if (channel_heap_.empty()) {
Austin Schuh6f3babe2020-01-26 20:34:50 -08001442 return monotonic_clock::max_time;
1443 }
1444 return channel_heap_.front().first;
1445}
1446
1447void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,
1448 int channel_index) {
1449 // Pop and recreate the heap if it has already been pushed. And since we are
1450 // pushing again, we don't need to clear pushed.
1451 if (timestamp_mergers_[channel_index].pushed()) {
Brian Silverman8a32ce62020-08-12 12:02:38 -07001452 const auto channel_iterator = std::find_if(
Austin Schuh6f3babe2020-01-26 20:34:50 -08001453 channel_heap_.begin(), channel_heap_.end(),
1454 [channel_index](const std::pair<monotonic_clock::time_point, int> x) {
1455 return x.second == channel_index;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001456 });
1457 DCHECK(channel_iterator != channel_heap_.end());
1458 if (std::get<0>(*channel_iterator) == timestamp) {
1459 // It's already in the heap, in the correct spot, so nothing
1460 // more for us to do here.
1461 return;
1462 }
1463 channel_heap_.erase(channel_iterator);
Austin Schuh6f3babe2020-01-26 20:34:50 -08001464 std::make_heap(channel_heap_.begin(), channel_heap_.end(),
1465 ChannelHeapCompare);
1466 }
1467
Austin Schuh2f8fd752020-09-01 22:38:28 -07001468 if (timestamp == monotonic_clock::min_time) {
1469 timestamp_mergers_[channel_index].set_pushed(false);
1470 return;
1471 }
1472
Austin Schuh05b70472020-01-01 17:11:17 -08001473 channel_heap_.push_back(std::make_pair(timestamp, channel_index));
1474
1475 // The default sort puts the newest message first. Use a custom comparator to
1476 // put the oldest message first.
1477 std::push_heap(channel_heap_.begin(), channel_heap_.end(),
1478 ChannelHeapCompare);
1479}
1480
Austin Schuh2f8fd752020-09-01 22:38:28 -07001481void ChannelMerger::VerifyHeaps() {
Austin Schuh661a8d82020-09-13 17:25:56 -07001482 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1483 channel_heap_;
1484 std::make_heap(channel_heap.begin(), channel_heap.end(), &ChannelHeapCompare);
Austin Schuh2f8fd752020-09-01 22:38:28 -07001485
Austin Schuh661a8d82020-09-13 17:25:56 -07001486 for (size_t i = 0; i < channel_heap_.size(); ++i) {
1487 CHECK(channel_heap_[i] == channel_heap[i]) << ": Heaps diverged...";
1488 CHECK_EQ(
1489 std::get<0>(channel_heap[i]),
1490 timestamp_mergers_[std::get<1>(channel_heap[i])].channel_merger_time());
Austin Schuh2f8fd752020-09-01 22:38:28 -07001491 }
1492}
1493
Austin Schuh6f3babe2020-01-26 20:34:50 -08001494std::tuple<TimestampMerger::DeliveryTimestamp, int,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001495 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001496ChannelMerger::PopOldest() {
Austin Schuh8bd96322020-02-13 21:18:22 -08001497 CHECK_GT(channel_heap_.size(), 0u);
Austin Schuh05b70472020-01-01 17:11:17 -08001498 std::pair<monotonic_clock::time_point, int> oldest_channel_data =
1499 channel_heap_.front();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001500 int channel_index = oldest_channel_data.second;
Austin Schuh05b70472020-01-01 17:11:17 -08001501 std::pop_heap(channel_heap_.begin(), channel_heap_.end(),
1502 &ChannelHeapCompare);
1503 channel_heap_.pop_back();
Austin Schuh8bd96322020-02-13 21:18:22 -08001504
Austin Schuh6f3babe2020-01-26 20:34:50 -08001505 timestamp_mergers_[channel_index].set_pushed(false);
Austin Schuh05b70472020-01-01 17:11:17 -08001506
Austin Schuh6f3babe2020-01-26 20:34:50 -08001507 TimestampMerger *merger = &timestamp_mergers_[channel_index];
Austin Schuh05b70472020-01-01 17:11:17 -08001508
Austin Schuhcde938c2020-02-02 17:30:07 -08001509 // Merger handles any queueing needed from here.
Austin Schuh6f3babe2020-01-26 20:34:50 -08001510 std::tuple<TimestampMerger::DeliveryTimestamp,
Austin Schuhadd6eb32020-11-09 21:24:26 -08001511 SizePrefixedFlatbufferVector<MessageHeader>>
Austin Schuh6f3babe2020-01-26 20:34:50 -08001512 message = merger->PopOldest();
Brian Silverman8a32ce62020-08-12 12:02:38 -07001513 DCHECK_EQ(std::get<0>(message).monotonic_event_time,
1514 oldest_channel_data.first)
1515 << ": channel_heap_ was corrupted for " << channel_index << ": "
1516 << DebugString();
Austin Schuh05b70472020-01-01 17:11:17 -08001517
Austin Schuh2f8fd752020-09-01 22:38:28 -07001518 CHECK_GE(std::get<0>(message).monotonic_event_time, last_popped_time_)
1519 << ": " << MaybeNodeName(log_file_header()->node())
1520 << "Messages came off the queue out of order. " << DebugString();
1521 last_popped_time_ = std::get<0>(message).monotonic_event_time;
1522
1523 VLOG(1) << "Popped " << last_popped_time_ << " "
1524 << configuration::StrippedChannelToString(
1525 configuration()->channels()->Get(channel_index))
1526 << " (" << channel_index << ")";
1527
Austin Schuh6f3babe2020-01-26 20:34:50 -08001528 return std::make_tuple(std::get<0>(message), channel_index,
1529 std::move(std::get<1>(message)));
1530}
1531
Austin Schuhcde938c2020-02-02 17:30:07 -08001532std::string SplitMessageReader::MessageHeaderQueue::DebugString() const {
1533 std::stringstream ss;
1534 for (size_t i = 0; i < data_.size(); ++i) {
Austin Schuh2f8fd752020-09-01 22:38:28 -07001535 if (i < 5 || i + 5 > data_.size()) {
1536 if (timestamps) {
1537 ss << " msg: ";
1538 } else {
1539 ss << " timestamp: ";
1540 }
1541 ss << monotonic_clock::time_point(
1542 chrono::nanoseconds(data_[i].message().monotonic_sent_time()))
Austin Schuhcde938c2020-02-02 17:30:07 -08001543 << " ("
Austin Schuh2f8fd752020-09-01 22:38:28 -07001544 << realtime_clock::time_point(
1545 chrono::nanoseconds(data_[i].message().realtime_sent_time()))
1546 << ") " << data_[i].message().queue_index();
1547 if (timestamps) {
1548 ss << " <- remote "
1549 << monotonic_clock::time_point(chrono::nanoseconds(
1550 data_[i].message().monotonic_remote_time()))
1551 << " ("
1552 << realtime_clock::time_point(chrono::nanoseconds(
1553 data_[i].message().realtime_remote_time()))
1554 << ")";
1555 }
1556 ss << "\n";
1557 } else if (i == 5) {
1558 ss << " ...\n";
Austin Schuh6f3babe2020-01-26 20:34:50 -08001559 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001560 }
Austin Schuh6f3babe2020-01-26 20:34:50 -08001561
Austin Schuhcde938c2020-02-02 17:30:07 -08001562 return ss.str();
1563}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001564
Austin Schuhcde938c2020-02-02 17:30:07 -08001565std::string SplitMessageReader::DebugString(int channel) const {
1566 std::stringstream ss;
1567 ss << "[\n";
1568 ss << channels_[channel].data.DebugString();
1569 ss << " ]";
1570 return ss.str();
1571}
Austin Schuh6f3babe2020-01-26 20:34:50 -08001572
Austin Schuhcde938c2020-02-02 17:30:07 -08001573std::string SplitMessageReader::DebugString(int channel, int node_index) const {
1574 std::stringstream ss;
1575 ss << "[\n";
1576 ss << channels_[channel].timestamps[node_index].DebugString();
1577 ss << " ]";
1578 return ss.str();
1579}
1580
1581std::string TimestampMerger::DebugString() const {
1582 std::stringstream ss;
1583
1584 if (timestamp_heap_.size() > 0) {
1585 ss << " timestamp_heap {\n";
1586 std::vector<
1587 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1588 timestamp_heap = timestamp_heap_;
1589 while (timestamp_heap.size() > 0u) {
1590 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1591 oldest_timestamp_reader = timestamp_heap.front();
1592
1593 ss << " " << std::get<2>(oldest_timestamp_reader) << " "
1594 << std::get<0>(oldest_timestamp_reader) << " queue_index ("
1595 << std::get<1>(oldest_timestamp_reader) << ") ttq "
1596 << std::get<2>(oldest_timestamp_reader)->time_to_queue() << " "
1597 << std::get<2>(oldest_timestamp_reader)->filename() << " -> "
1598 << std::get<2>(oldest_timestamp_reader)
1599 ->DebugString(channel_index_, node_index_)
1600 << "\n";
1601
1602 std::pop_heap(timestamp_heap.begin(), timestamp_heap.end(),
1603 &SplitMessageReaderHeapCompare);
1604 timestamp_heap.pop_back();
1605 }
1606 ss << " }\n";
1607 }
1608
1609 ss << " message_heap {\n";
1610 {
1611 std::vector<
1612 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>
1613 message_heap = message_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001614 while (!message_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001615 std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>
1616 oldest_message_reader = message_heap.front();
1617
1618 ss << " " << std::get<2>(oldest_message_reader) << " "
1619 << std::get<0>(oldest_message_reader) << " queue_index ("
1620 << std::get<1>(oldest_message_reader) << ") ttq "
1621 << std::get<2>(oldest_message_reader)->time_to_queue() << " "
1622 << std::get<2>(oldest_message_reader)->filename() << " -> "
1623 << std::get<2>(oldest_message_reader)->DebugString(channel_index_)
1624 << "\n";
1625
1626 std::pop_heap(message_heap.begin(), message_heap.end(),
1627 &SplitMessageReaderHeapCompare);
1628 message_heap.pop_back();
Austin Schuh6f3babe2020-01-26 20:34:50 -08001629 }
Austin Schuh05b70472020-01-01 17:11:17 -08001630 }
Austin Schuhcde938c2020-02-02 17:30:07 -08001631 ss << " }";
1632
1633 return ss.str();
1634}
1635
1636std::string ChannelMerger::DebugString() const {
1637 std::stringstream ss;
1638 ss << "start_time " << realtime_start_time() << " " << monotonic_start_time()
1639 << "\n";
1640 ss << "channel_heap {\n";
1641 std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =
1642 channel_heap_;
Brian Silverman8a32ce62020-08-12 12:02:38 -07001643 while (!channel_heap.empty()) {
Austin Schuhcde938c2020-02-02 17:30:07 -08001644 std::tuple<monotonic_clock::time_point, int> channel = channel_heap.front();
1645 ss << " " << std::get<0>(channel) << " (" << std::get<1>(channel) << ") "
1646 << configuration::CleanedChannelToString(
1647 configuration()->channels()->Get(std::get<1>(channel)))
1648 << "\n";
1649
1650 ss << timestamp_mergers_[std::get<1>(channel)].DebugString() << "\n";
1651
1652 std::pop_heap(channel_heap.begin(), channel_heap.end(),
1653 &ChannelHeapCompare);
1654 channel_heap.pop_back();
1655 }
1656 ss << "}";
1657
1658 return ss.str();
Austin Schuh05b70472020-01-01 17:11:17 -08001659}
1660
Austin Schuhee711052020-08-24 16:06:09 -07001661std::string MaybeNodeName(const Node *node) {
1662 if (node != nullptr) {
1663 return node->name()->str() + " ";
1664 }
1665 return "";
1666}
1667
Brian Silvermanf51499a2020-09-21 12:49:08 -07001668} // namespace aos::logger