Blame - aos/events/logging/logfile_utils.cc - RealtimeRoboticsGroup/test

2019-12-30 18:07:15 -0800

[diff] [blame]

1

#include "aos/events/logging/logfile_utils.h"

2

3

#include <fcntl.h>

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

4

#include <sys/stat.h>

5

#include <sys/types.h>

6

#include <sys/uio.h>

7

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

8

#include <algorithm>

9

#include <climits>

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

10

Austin Schuh

e4fca83

2020-03-07 16:58:53 -0800

[diff] [blame]

11

#include "absl/strings/escaping.h"

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

12

#include "aos/configuration.h"

Austin Schuh

2020-01-07 20:07:41 -0800

[diff] [blame]

13

#include "aos/flatbuffer_merge.h"

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

14

#include "aos/util/file.h"

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

15

#include "flatbuffers/flatbuffers.h"

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

16

#include "gflags/gflags.h"

17

#include "glog/logging.h"

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

18

Brian Silverman

f59fe3f

2020-09-22 21:04:09 -0700

[diff] [blame]

19

#if defined(__x86_64__)

20

#define ENABLE_LZMA 1

21

#elif defined(__aarch64__)

22

#define ENABLE_LZMA 1

23

#else

24

#define ENABLE_LZMA 0

#endif

#if ENABLE_LZMA

#include "aos/events/logging/lzma_encoder.h"

29

#endif

30

Austin Schuh

7fbf5a7

2020-09-21 16:28:13 -0700

[diff] [blame]

31

DEFINE_int32(flush_size, 128000,

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

32

"Number of outstanding bytes to allow before flushing to disk.");

33

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

34

namespace aos::logger {

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

35

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

36

namespace chrono = std::chrono;

37

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

38

DetachedBufferWriter::DetachedBufferWriter(

39

std::string_view filename, std::unique_ptr<DetachedBufferEncoder> encoder)

40

: filename_(filename), encoder_(std::move(encoder)) {

Brian Silverman

a9f2ec9

2020-10-06 18:00:53 -0700

[diff] [blame^]

41

if (!util::MkdirPIfSpace(filename, 0777)) {

42

ran_out_of_space_ = true;

43

} else {

44

fd_ = open(std::string(filename).c_str(),

45

O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0774);

46

if (fd_ == -1 && errno == ENOSPC) {

47

ran_out_of_space_ = true;

48

} else {

49

PCHECK(fd_ != -1) << ": Failed to open " << filename << " for writing";

50

VLOG(1) << "Opened " << filename << " for writing";

51

}

52

}

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

53

}

54

55

DetachedBufferWriter::~DetachedBufferWriter() {

Brian Silverman

2020-09-24 00:29:18 -0700

[diff] [blame]

56

Close();

57

if (ran_out_of_space_) {

58

CHECK(acknowledge_ran_out_of_space_)

59

<< ": Unacknowledged out of disk space, log file was not completed";

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

60

}

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

61

}

62

Brian Silverman

d90905f

2020-09-23 14:42:56 -0700

[diff] [blame]

63

DetachedBufferWriter::DetachedBufferWriter(DetachedBufferWriter &&other) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

64

*this = std::move(other);

65

}

66

Brian Silverman

87ac040

2020-09-17 14:47:01 -0700

[diff] [blame]

67

// When other is destroyed "soon" (which it should be because we're getting an

68

// rvalue reference to it), it will flush etc all the data we have queued up

69

// (because that data will then be its data).

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

70

DetachedBufferWriter &DetachedBufferWriter::operator=(

71

DetachedBufferWriter &&other) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

72

std::swap(filename_, other.filename_);

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

73

std::swap(encoder_, other.encoder_);

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

74

std::swap(fd_, other.fd_);

Brian Silverman

2020-09-24 00:29:18 -0700

[diff] [blame]

75

std::swap(ran_out_of_space_, other.ran_out_of_space_);

76

std::swap(acknowledge_ran_out_of_space_, other.acknowledge_ran_out_of_space_);

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

77

std::swap(iovec_, other.iovec_);

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

78

std::swap(max_write_time_, other.max_write_time_);

79

std::swap(max_write_time_bytes_, other.max_write_time_bytes_);

80

std::swap(max_write_time_messages_, other.max_write_time_messages_);

81

std::swap(total_write_time_, other.total_write_time_);

82

std::swap(total_write_count_, other.total_write_count_);

83

std::swap(total_write_messages_, other.total_write_messages_);

84

std::swap(total_write_bytes_, other.total_write_bytes_);

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

85

return *this;

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

86

}

87

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

88

void DetachedBufferWriter::QueueSpan(absl::Span<const uint8_t> span) {

Brian Silverman

a9f2ec9

2020-10-06 18:00:53 -0700

[diff] [blame^]

89

if (ran_out_of_space_) {

90

// We don't want any later data to be written after space becomes

91

// available, so refuse to write anything more once we've dropped data

92

// because we ran out of space.

93

VLOG(1) << "Ignoring span: " << span.size();

return;

}

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

97

if (encoder_->may_bypass() && span.size() > 4096u) {

98

// Over this threshold, we'll assume it's cheaper to add an extra

99

// syscall to write the data immediately instead of copying it to

100

// enqueue.

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

101

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

102

// First, flush everything.

103

while (encoder_->queue_size() > 0u) {

104

Flush();

105

}

Austin Schuh

de031b7

2020-01-10 19:34:41 -0800

[diff] [blame]

106

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

107

// Then, write it directly.

108

const auto start = aos::monotonic_clock::now();

109

const ssize_t written = write(fd_, span.data(), span.size());

110

const auto end = aos::monotonic_clock::now();

Brian Silverman

2020-09-24 00:29:18 -0700

[diff] [blame]

111

HandleWriteReturn(written, span.size());

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

112

UpdateStatsForWrite(end - start, written, 1);

113

} else {

114

encoder_->Encode(CopySpanAsDetachedBuffer(span));

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

115

}

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

116

117

FlushAtThreshold();

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

118

}

119

Brian Silverman

2020-09-24 00:29:18 -0700

[diff] [blame]

120

void DetachedBufferWriter::Close() {

if (fd_ == -1) {

return;

}

encoder_->Finish();

while (encoder_->queue_size() > 0) {

126

Flush();

127

}

128

if (close(fd_) == -1) {

129

if (errno == ENOSPC) {

130

ran_out_of_space_ = true;

131

} else {

132

PLOG(ERROR) << "Closing log file failed";

}

}

fd_ = -1;

VLOG(1) << "Closed " << filename_;

137

}

138

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

139

void DetachedBufferWriter::Flush() {

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

140

const auto queue = encoder_->queue();

141

if (queue.empty()) {

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

142

return;

143

}

Brian Silverman

2020-09-24 00:29:18 -0700

[diff] [blame]

144

if (ran_out_of_space_) {

145

// We don't want any later data to be written after space becomes available,

146

// so refuse to write anything more once we've dropped data because we ran

147

// out of space.

148

VLOG(1) << "Ignoring queue: " << queue.size();

149

encoder_->Clear(queue.size());

150

return;

151

}

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

152

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

153

iovec_.clear();

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

154

const size_t iovec_size = std::min<size_t>(queue.size(), IOV_MAX);

155

iovec_.resize(iovec_size);

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

156

size_t counted_size = 0;

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

157

for (size_t i = 0; i < iovec_size; ++i) {

158

iovec_[i].iov_base = const_cast<uint8_t *>(queue[i].data());

159

iovec_[i].iov_len = queue[i].size();

160

counted_size += iovec_[i].iov_len;

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

161

}

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

162

163

const auto start = aos::monotonic_clock::now();

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

164

const ssize_t written = writev(fd_, iovec_.data(), iovec_.size());

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

165

const auto end = aos::monotonic_clock::now();

Brian Silverman

2020-09-24 00:29:18 -0700

[diff] [blame]

166

HandleWriteReturn(written, counted_size);

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

167

168

encoder_->Clear(iovec_size);

169

170

UpdateStatsForWrite(end - start, written, iovec_size);

171

}

172

Brian Silverman

2020-09-24 00:29:18 -0700

[diff] [blame]

173

void DetachedBufferWriter::HandleWriteReturn(ssize_t write_return,

174

size_t write_size) {

175

if (write_return == -1 && errno == ENOSPC) {

176

ran_out_of_space_ = true;

177

return;

178

}

179

PCHECK(write_return >= 0) << ": write failed";

180

if (write_return < static_cast<ssize_t>(write_size)) {

181

// Sometimes this happens instead of ENOSPC. On a real filesystem, this

182

// never seems to happen in any other case. If we ever want to log to a

183

// socket, this will happen more often. However, until we get there, we'll

184

// just assume it means we ran out of space.

185

ran_out_of_space_ = true;

return;

}

}

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

190

void DetachedBufferWriter::UpdateStatsForWrite(

191

aos::monotonic_clock::duration duration, ssize_t written, int iovec_size) {

192

if (duration > max_write_time_) {

193

max_write_time_ = duration;

194

max_write_time_bytes_ = written;

195

max_write_time_messages_ = iovec_size;

196

}

197

total_write_time_ += duration;

198

++total_write_count_;

199

total_write_messages_ += iovec_size;

200

total_write_bytes_ += written;

201

}

202

203

void DetachedBufferWriter::FlushAtThreshold() {

204

// Flush if we are at the max number of iovs per writev, because there's no

205

// point queueing up any more data in memory. Also flush once we have enough

206

// data queued up.

207

while (encoder_->queued_bytes() > static_cast<size_t>(FLAGS_flush_size) ||

208

encoder_->queue_size() >= IOV_MAX) {

209

Flush();

210

}

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

211

}

212

213

flatbuffers::Offset<MessageHeader> PackMessage(

214

flatbuffers::FlatBufferBuilder *fbb, const Context &context,

215

int channel_index, LogType log_type) {

216

flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data_offset;

217

218

switch (log_type) {

219

case LogType::kLogMessage:

220

case LogType::kLogMessageAndDeliveryTime:

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

221

case LogType::kLogRemoteMessage:

Brian Silverman

eaa41d6

2020-07-08 19:47:35 -0700

[diff] [blame]

222

data_offset = fbb->CreateVector(

223

static_cast<const uint8_t *>(context.data), context.size);

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

224

break;

225

226

case LogType::kLogDeliveryTimeOnly:

break;

}

MessageHeader::Builder message_header_builder(*fbb);

231

message_header_builder.add_channel_index(channel_index);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

232

233

switch (log_type) {

234

case LogType::kLogRemoteMessage:

235

message_header_builder.add_queue_index(context.remote_queue_index);

236

message_header_builder.add_monotonic_sent_time(

237

context.monotonic_remote_time.time_since_epoch().count());

238

message_header_builder.add_realtime_sent_time(

239

context.realtime_remote_time.time_since_epoch().count());

240

break;

241

242

case LogType::kLogMessage:

243

case LogType::kLogMessageAndDeliveryTime:

244

case LogType::kLogDeliveryTimeOnly:

245

message_header_builder.add_queue_index(context.queue_index);

246

message_header_builder.add_monotonic_sent_time(

247

context.monotonic_event_time.time_since_epoch().count());

248

message_header_builder.add_realtime_sent_time(

249

context.realtime_event_time.time_since_epoch().count());

250

break;

251

}

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

252

253

switch (log_type) {

254

case LogType::kLogMessage:

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

255

case LogType::kLogRemoteMessage:

Austin Schuh

2019-12-30 18:07:15 -0800

[diff] [blame]

256

message_header_builder.add_data(data_offset);

257

break;

258

259

case LogType::kLogMessageAndDeliveryTime:

260

message_header_builder.add_data(data_offset);

261

[[fallthrough]];

262

263

case LogType::kLogDeliveryTimeOnly:

264

message_header_builder.add_monotonic_remote_time(

265

context.monotonic_remote_time.time_since_epoch().count());

266

message_header_builder.add_realtime_remote_time(

267

context.realtime_remote_time.time_since_epoch().count());

268

message_header_builder.add_remote_queue_index(context.remote_queue_index);

break;

}

return message_header_builder.Finish();

273

}

274

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

275

SpanReader::SpanReader(std::string_view filename) : filename_(filename) {

Brian Silverman

f59fe3f

2020-09-22 21:04:09 -0700

[diff] [blame]

276

static const std::string_view kXz = ".xz";

277

if (filename.substr(filename.size() - kXz.size()) == kXz) {

278

#if ENABLE_LZMA

279

decoder_ = std::make_unique<LzmaDecoder>(filename);

280

#else

281

LOG(FATAL) << "Reading xz-compressed files not supported on this platform";

282

#endif

283

} else {

284

decoder_ = std::make_unique<DummyDecoder>(filename);

285

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

286

}

287

288

absl::Span<const uint8_t> SpanReader::ReadMessage() {

289

// Make sure we have enough for the size.

290

if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {

291

if (!ReadBlock()) {

292

return absl::Span<const uint8_t>();

}

}

// Now make sure we have enough for the message.

297

const size_t data_size =

298

flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +

299

sizeof(flatbuffers::uoffset_t);

Austin Schuh

e4fca83

2020-03-07 16:58:53 -0800

[diff] [blame]

300

if (data_size == sizeof(flatbuffers::uoffset_t)) {

301

LOG(ERROR) << "Size of data is zero. Log file end is corrupted, skipping.";

302

LOG(ERROR) << " Rest of log file is "

303

<< absl::BytesToHexString(std::string_view(

304

reinterpret_cast<const char *>(data_.data() +

305

consumed_data_),

306

data_.size() - consumed_data_));

307

return absl::Span<const uint8_t>();

308

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

309

while (data_.size() < consumed_data_ + data_size) {

310

if (!ReadBlock()) {

311

return absl::Span<const uint8_t>();

}

}

// And return it, consuming the data.

316

const uint8_t *data_ptr = data_.data() + consumed_data_;

317

318

consumed_data_ += data_size;

319

320

return absl::Span<const uint8_t>(data_ptr, data_size);

321

}

322

323

bool SpanReader::MessageAvailable() {

324

// Are we big enough to read the size?

325

if (data_.size() - consumed_data_ < sizeof(flatbuffers::uoffset_t)) {

return false;

}

// Then, are we big enough to read the full message?

330

const size_t data_size =

331

flatbuffers::GetPrefixedSize(data_.data() + consumed_data_) +

332

sizeof(flatbuffers::uoffset_t);

333

if (data_.size() < consumed_data_ + data_size) {

return false;

}

return true;

}

bool SpanReader::ReadBlock() {

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

341

// This is the amount of data we grab at a time. Doing larger chunks minimizes

342

// syscalls and helps decompressors batch things more efficiently.

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

343

constexpr size_t kReadSize = 256 * 1024;

344

345

// Strip off any unused data at the front.

346

if (consumed_data_ != 0) {

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

347

data_.erase_front(consumed_data_);

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

consumed_data_ = 0;

}

const size_t starting_size = data_.size();

352

353

// This should automatically grow the backing store. It won't shrink if we

354

// get a small chunk later. This reduces allocations when we want to append

355

// more data.

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

356

data_.resize(starting_size + kReadSize);

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

357

Brian Silverman

2020-09-21 12:49:08 -0700

[diff] [blame]

358

const size_t count =

359

decoder_->Read(data_.begin() + starting_size, data_.end());

360

data_.resize(starting_size + count);

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

361

if (count == 0) {

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

362

return false;

363

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

return true;

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

368

FlatbufferVector<LogFileHeader> ReadHeader(std::string_view filename) {

369

SpanReader span_reader(filename);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

370

absl::Span<const uint8_t> config_data = span_reader.ReadMessage();

371

372

// Make sure something was read.

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

373

CHECK(config_data != absl::Span<const uint8_t>())

374

<< ": Failed to read header from: " << filename;

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

375

Austin Schuh

5212cad

2020-09-09 23:12:09 -0700

[diff] [blame]

376

// And copy the config so we have it forever, removing the size prefix.

Brian Silverman

354697a

2020-09-22 21:06:32 -0700

[diff] [blame]

377

ResizeableBuffer data;

378

data.resize(config_data.size() - sizeof(flatbuffers::uoffset_t));

379

memcpy(data.data(), config_data.begin() + sizeof(flatbuffers::uoffset_t),

380

data.size());

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

381

return FlatbufferVector<LogFileHeader>(std::move(data));

382

}

383

Austin Schuh

5212cad

2020-09-09 23:12:09 -0700

[diff] [blame]

384

FlatbufferVector<MessageHeader> ReadNthMessage(std::string_view filename,

385

size_t n) {

386

SpanReader span_reader(filename);

387

absl::Span<const uint8_t> data_span = span_reader.ReadMessage();

388

for (size_t i = 0; i < n + 1; ++i) {

389

data_span = span_reader.ReadMessage();

390

391

// Make sure something was read.

392

CHECK(data_span != absl::Span<const uint8_t>())

393

<< ": Failed to read data from: " << filename;

394

}

395

Brian Silverman

354697a

2020-09-22 21:06:32 -0700

[diff] [blame]

396

// And copy the config so we have it forever, removing the size prefix.

397

ResizeableBuffer data;

398

data.resize(data_span.size() - sizeof(flatbuffers::uoffset_t));

399

memcpy(data.data(), data_span.begin() + sizeof(flatbuffers::uoffset_t),

400

data.size());

Austin Schuh

5212cad

2020-09-09 23:12:09 -0700

[diff] [blame]

401

return FlatbufferVector<MessageHeader>(std::move(data));

402

}

403

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

404

MessageReader::MessageReader(std::string_view filename)

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

405

: span_reader_(filename),

406

raw_log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

407

// Make sure we have enough to read the size.

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

408

absl::Span<const uint8_t> header_data = span_reader_.ReadMessage();

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

409

410

// Make sure something was read.

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

411

CHECK(header_data != absl::Span<const uint8_t>())

412

<< ": Failed to read header from: " << filename;

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

413

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

414

// And copy the header data so we have it forever.

Brian Silverman

354697a

2020-09-22 21:06:32 -0700

[diff] [blame]

415

ResizeableBuffer header_data_copy;

416

header_data_copy.resize(header_data.size() - sizeof(flatbuffers::uoffset_t));

417

memcpy(header_data_copy.data(),

418

header_data.begin() + sizeof(flatbuffers::uoffset_t),

419

header_data_copy.size());

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

420

raw_log_file_header_ =

421

FlatbufferVector<LogFileHeader>(std::move(header_data_copy));

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

422

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

423

max_out_of_order_duration_ =

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

424

chrono::nanoseconds(log_file_header()->max_out_of_order_duration());

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

425

426

VLOG(1) << "Opened " << filename << " as node "

427

<< FlatbufferToJson(log_file_header()->node());

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

428

}

429

430

std::optional<FlatbufferVector<MessageHeader>> MessageReader::ReadMessage() {

431

absl::Span<const uint8_t> msg_data = span_reader_.ReadMessage();

432

if (msg_data == absl::Span<const uint8_t>()) {

return std::nullopt;

}

Brian Silverman

2020-09-22 21:06:32 -0700

[diff] [blame]

436

ResizeableBuffer result_buffer;

437

result_buffer.resize(msg_data.size() - sizeof(flatbuffers::uoffset_t));

438

memcpy(result_buffer.data(),

439

msg_data.begin() + sizeof(flatbuffers::uoffset_t),

440

result_buffer.size());

441

FlatbufferVector<MessageHeader> result(std::move(result_buffer));

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

442

443

const monotonic_clock::time_point timestamp = monotonic_clock::time_point(

444

chrono::nanoseconds(result.message().monotonic_sent_time()));

445

446

newest_timestamp_ = std::max(newest_timestamp_, timestamp);

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

447

VLOG(2) << "Read from " << filename() << " data " << FlatbufferToJson(result);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

448

return std::move(result);

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

449

}

450

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

451

SplitMessageReader::SplitMessageReader(

Austin Schuh

2020-01-07 20:07:41 -0800

[diff] [blame]

452

const std::vector<std::string> &filenames)

453

: filenames_(filenames),

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

454

log_file_header_(FlatbufferVector<LogFileHeader>::Empty()) {

Austin Schuh

2020-01-07 20:07:41 -0800

[diff] [blame]

455

CHECK(NextLogFile()) << ": filenames is empty. Need files to read.";

456

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

457

// Grab any log file header. They should all match (and we will check as we

458

// open more of them).

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

459

log_file_header_ = message_reader_->raw_log_file_header();

Austin Schuh

2020-01-07 20:07:41 -0800

[diff] [blame]

460

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

461

for (size_t i = 1; i < filenames_.size(); ++i) {

462

MessageReader message_reader(filenames_[i]);

463

464

const monotonic_clock::time_point new_monotonic_start_time(

465

chrono::nanoseconds(

466

message_reader.log_file_header()->monotonic_start_time()));

467

const realtime_clock::time_point new_realtime_start_time(

468

chrono::nanoseconds(

469

message_reader.log_file_header()->realtime_start_time()));

470

471

// There are 2 types of part files. Part files from before time estimation

472

// has started, and part files after. We don't declare a log file "started"

473

// until time estimation is up. And once a log file starts, it should never

474

// stop again, and should remain constant.

475

// To compare both types of headers, we mutate our saved copy of the header

476

// to match the next chunk by updating time if we detect a stopped ->

477

// started transition.

478

if (monotonic_start_time() == monotonic_clock::min_time) {

479

CHECK_EQ(realtime_start_time(), realtime_clock::min_time);

480

// We should only be missing the monotonic start time when logging data

Brian Silverman

87ac040

2020-09-17 14:47:01 -0700

[diff] [blame]

481

// for remote nodes. We don't have a good way to determine the remote

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

482

// realtime offset, so it shouldn't be filled out.

483

// TODO(austin): If we have a good way, feel free to fill it out. It

484

// probably won't be better than we could do in post though with the same

485

// data.

486

CHECK(!log_file_header_.mutable_message()->has_realtime_start_time());

487

if (new_monotonic_start_time != monotonic_clock::min_time) {

488

// If we finally found our start time, update the header. Do this once

489

// because it should never change again.

490

log_file_header_.mutable_message()->mutate_monotonic_start_time(

491

new_monotonic_start_time.time_since_epoch().count());

492

log_file_header_.mutable_message()->mutate_realtime_start_time(

493

new_realtime_start_time.time_since_epoch().count());

}

}

Austin Schuh

2020-09-09 22:47:47 -0700

[diff] [blame]

497

// We don't have a good way to set the realtime start time on remote nodes.

498

// Confirm it remains consistent.

499

CHECK_EQ(log_file_header_.mutable_message()->has_realtime_start_time(),

500

message_reader.log_file_header()->has_realtime_start_time());

501

502

// Parts index will *not* match unless we set them to match. We only want

503

// to accept the start time and parts mismatching, so set them.

504

log_file_header_.mutable_message()->mutate_parts_index(

505

message_reader.log_file_header()->parts_index());

506

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

507

// Now compare that the headers match.

Austin Schuh

2020-09-09 22:47:47 -0700

[diff] [blame]

508

if (!CompareFlatBuffer(message_reader.raw_log_file_header(),

509

log_file_header_)) {

Brian Silverman

ae7c033

2020-09-30 16:58:23 -0700

[diff] [blame]

510

if (message_reader.log_file_header()->has_log_event_uuid() &&

511

log_file_header_.message().has_log_event_uuid() &&

512

message_reader.log_file_header()->log_event_uuid()->string_view() !=

513

log_file_header_.message().log_event_uuid()->string_view()) {

Austin Schuh

2020-09-09 22:47:47 -0700

[diff] [blame]

514

LOG(FATAL) << "Logger UUIDs don't match between log file chunks "

515

<< filenames_[0] << " and " << filenames_[i]

516

<< ", this is not supported.";

517

}

518

if (message_reader.log_file_header()->has_parts_uuid() &&

519

log_file_header_.message().has_parts_uuid() &&

520

message_reader.log_file_header()->parts_uuid()->string_view() !=

521

log_file_header_.message().parts_uuid()->string_view()) {

522

LOG(FATAL) << "Parts UUIDs don't match between log file chunks "

523

<< filenames_[0] << " and " << filenames_[i]

524

<< ", this is not supported.";

525

}

526

527

LOG(FATAL) << "Header is different between log file chunks "

528

<< filenames_[0] << " and " << filenames_[i]

529

<< ", this is not supported.";

530

}

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

531

}

Austin Schuh

2020-09-09 22:47:47 -0700

[diff] [blame]

532

// Put the parts index back to the first log file chunk.

533

log_file_header_.mutable_message()->mutate_parts_index(

534

message_reader_->log_file_header()->parts_index());

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

535

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

536

// Setup per channel state.

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

537

channels_.resize(configuration()->channels()->size());

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

538

for (ChannelData &channel_data : channels_) {

539

channel_data.data.split_reader = this;

540

// Build up the timestamp list.

541

if (configuration::MultiNode(configuration())) {

542

channel_data.timestamps.resize(configuration()->nodes()->size());

543

for (MessageHeaderQueue &queue : channel_data.timestamps) {

544

queue.timestamps = true;

545

queue.split_reader = this;

546

}

547

}

548

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

549

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

550

// Build up channels_to_write_ as an optimization to make it fast to figure

551

// out which datastructure to place any new data from a channel on.

552

for (const Channel *channel : *configuration()->channels()) {

553

// This is the main case. We will only see data on this node.

554

if (configuration::ChannelIsSendableOnNode(channel, node())) {

555

channels_to_write_.emplace_back(

556

&channels_[channels_to_write_.size()].data);

557

} else

558

// If we can't send, but can receive, we should be able to see

559

// timestamps here.

560

if (configuration::ChannelIsReadableOnNode(channel, node())) {

561

channels_to_write_.emplace_back(

562

&(channels_[channels_to_write_.size()]

563

.timestamps[configuration::GetNodeIndex(configuration(),

564

node())]));

565

} else {

566

channels_to_write_.emplace_back(nullptr);

567

}

568

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

569

}

570

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

571

bool SplitMessageReader::NextLogFile() {

Austin Schuh

2020-01-07 20:07:41 -0800

[diff] [blame]

572

if (next_filename_index_ == filenames_.size()) {

return false;

}

message_reader_ =

std::make_unique<MessageReader>(filenames_[next_filename_index_]);

577

578

// We can't support the config diverging between two log file headers. See if

579

// they are the same.

580

if (next_filename_index_ != 0) {

Austin Schuh

2020-09-09 22:47:47 -0700

[diff] [blame]

581

// In order for the headers to identically compare, they need to have the

582

// same parts_index. Rewrite the saved header with the new parts_index,

583

// compare, and then restore.

584

const int32_t original_parts_index =

585

log_file_header_.message().parts_index();

586

log_file_header_.mutable_message()->mutate_parts_index(

587

message_reader_->log_file_header()->parts_index());

588

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

589

CHECK(CompareFlatBuffer(message_reader_->raw_log_file_header(),

590

log_file_header_))

Austin Schuh

2020-01-07 20:07:41 -0800

[diff] [blame]

591

<< ": Header is different between log file chunks "

592

<< filenames_[next_filename_index_] << " and "

593

<< filenames_[next_filename_index_ - 1] << ", this is not supported.";

Austin Schuh

2020-09-09 22:47:47 -0700

[diff] [blame]

594

595

log_file_header_.mutable_message()->mutate_parts_index(

596

original_parts_index);

Austin Schuh

2020-01-07 20:07:41 -0800

[diff] [blame]

597

}

598

599

++next_filename_index_;

return true;

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

603

bool SplitMessageReader::QueueMessages(

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

604

monotonic_clock::time_point last_dequeued_time) {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

605

// TODO(austin): Once we are happy that everything works, read a 256kb chunk

606

// to reduce the need to re-heap down below.

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

607

608

// Special case no more data. Otherwise we blow up on the CHECK statement

609

// confirming that we have enough data queued.

if (at_end_) {

return false;

}

// If this isn't the first time around, confirm that we had enough data queued

615

// to follow the contract.

616

if (time_to_queue_ != monotonic_clock::min_time) {

617

CHECK_LE(last_dequeued_time,

618

newest_timestamp() - max_out_of_order_duration())

619

<< " node " << FlatbufferToJson(node()) << " on " << this;

620

621

// Bail if there is enough data already queued.

622

if (last_dequeued_time < time_to_queue_) {

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

623

VLOG(1) << MaybeNodeName(target_node_) << "All up to date on " << this

624

<< ", dequeued " << last_dequeued_time << " queue time "

625

<< time_to_queue_;

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

return true;

}

} else {

// Startup takes a special dance. We want to queue up until the start time,

630

// but we then want to find the next message to read. The conservative

631

// answer is to immediately trigger a second requeue to get things moving.

632

time_to_queue_ = monotonic_start_time();

633

QueueMessages(time_to_queue_);

634

}

635

636

// If we are asked to queue, queue for at least max_out_of_order_duration past

637

// the last known time in the log file (ie the newest timestep read). As long

638

// as we requeue exactly when time_to_queue_ is dequeued and go no further, we

639

// are safe. And since we pop in order, that works.

640

//

641

// Special case the start of the log file. There should be at most 1 message

642

// from each channel at the start of the log file. So always force the start

643

// of the log file to just be read.

644

time_to_queue_ = std::max(time_to_queue_, newest_timestamp());

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

645

VLOG(1) << MaybeNodeName(target_node_) << "Queueing, going until "

646

<< time_to_queue_ << " " << filename();

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

647

648

bool was_emplaced = false;

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

649

while (true) {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

650

// Stop if we have enough.

Brian Silverman

98360e2

2020-04-28 16:51:20 -0700

[diff] [blame]

651

if (newest_timestamp() > time_to_queue_ + max_out_of_order_duration() &&

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

652

was_emplaced) {

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

653

VLOG(1) << MaybeNodeName(target_node_) << "Done queueing on " << this

654

<< ", queued to " << newest_timestamp() << " with requeue time "

655

<< time_to_queue_;

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

656

return true;

657

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

658

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

659

if (std::optional<FlatbufferVector<MessageHeader>> msg =

660

message_reader_->ReadMessage()) {

661

const MessageHeader &header = msg.value().message();

662

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

663

const monotonic_clock::time_point timestamp = monotonic_clock::time_point(

664

chrono::nanoseconds(header.monotonic_sent_time()));

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

665

Austin Schuh

0b5fd03

2020-03-28 17:36:49 -0700

[diff] [blame]

666

if (VLOG_IS_ON(2)) {

Brian Silverman

d90905f

2020-09-23 14:42:56 -0700

[diff] [blame]

667

LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "

668

<< filename() << " ttq: " << time_to_queue_ << " now "

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

669

<< newest_timestamp() << " start time "

670

<< monotonic_start_time() << " " << FlatbufferToJson(&header);

Austin Schuh

0b5fd03

2020-03-28 17:36:49 -0700

[diff] [blame]

671

} else if (VLOG_IS_ON(1)) {

672

FlatbufferVector<MessageHeader> copy = msg.value();

673

copy.mutable_message()->clear_data();

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

674

LOG(INFO) << MaybeNodeName(target_node_) << "Queued " << this << " "

675

<< filename() << " ttq: " << time_to_queue_ << " now "

676

<< newest_timestamp() << " start time "

677

<< monotonic_start_time() << " " << FlatbufferToJson(copy);

Austin Schuh

0b5fd03

2020-03-28 17:36:49 -0700

[diff] [blame]

678

}

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

679

680

const int channel_index = header.channel_index();

681

was_emplaced = channels_to_write_[channel_index]->emplace_back(

682

std::move(msg.value()));

683

if (was_emplaced) {

684

newest_timestamp_ = std::max(newest_timestamp_, timestamp);

685

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

686

} else {

687

if (!NextLogFile()) {

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

688

VLOG(1) << MaybeNodeName(target_node_) << "No more files, last was "

689

<< filenames_.back();

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

690

at_end_ = true;

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

691

for (MessageHeaderQueue *queue : channels_to_write_) {

692

if (queue == nullptr || queue->timestamp_merger == nullptr) {

693

continue;

694

}

695

queue->timestamp_merger->NoticeAtEnd();

696

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

697

return false;

698

}

699

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

700

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

701

}

702

703

void SplitMessageReader::SetTimestampMerger(TimestampMerger *timestamp_merger,

704

int channel_index,

705

const Node *target_node) {

706

const Node *reinterpreted_target_node =

707

configuration::GetNodeOrDie(configuration(), target_node);

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

708

target_node_ = reinterpreted_target_node;

709

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

710

const Channel *const channel =

711

configuration()->channels()->Get(channel_index);

712

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

713

VLOG(1) << " Configuring merger " << this << " for channel " << channel_index

714

<< " "

715

<< configuration::CleanedChannelToString(

716

configuration()->channels()->Get(channel_index));

717

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

718

MessageHeaderQueue *message_header_queue = nullptr;

719

720

// Figure out if this log file is from our point of view, or the other node's

721

// point of view.

722

if (node() == reinterpreted_target_node) {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

723

VLOG(1) << " Replaying as logged node " << filename();

724

725

if (configuration::ChannelIsSendableOnNode(channel, node())) {

726

VLOG(1) << " Data on node";

727

message_header_queue = &(channels_[channel_index].data);

728

} else if (configuration::ChannelIsReadableOnNode(channel, node())) {

729

VLOG(1) << " Timestamps on node";

730

message_header_queue =

731

&(channels_[channel_index].timestamps[configuration::GetNodeIndex(

732

configuration(), node())]);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

733

} else {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

734

VLOG(1) << " Dropping";

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

735

}

736

} else {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

737

VLOG(1) << " Replaying as other node " << filename();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

738

// We are replaying from another node's point of view. The only interesting

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

739

// data is data that is sent from our node and received on theirs.

740

if (configuration::ChannelIsReadableOnNode(channel,

741

reinterpreted_target_node) &&

742

configuration::ChannelIsSendableOnNode(channel, node())) {

743

VLOG(1) << " Readable on target node";

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

744

// Data from another node.

745

message_header_queue = &(channels_[channel_index].data);

746

} else {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

747

VLOG(1) << " Dropping";

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

748

// This is either not sendable on the other node, or is a timestamp and

749

// therefore not interesting.

}

}

// If we found one, write it down. This will be nullptr when there is nothing

754

// relevant on this channel on this node for the target node. In that case,

755

// we want to drop the message instead of queueing it.

756

if (message_header_queue != nullptr) {

757

message_header_queue->timestamp_merger = timestamp_merger;

}

}

std::tuple<monotonic_clock::time_point, uint32_t,

762

FlatbufferVector<MessageHeader>>

763

SplitMessageReader::PopOldest(int channel_index) {

764

CHECK_GT(channels_[channel_index].data.size(), 0u);

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

765

const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

766

timestamp = channels_[channel_index].data.front_timestamp();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

767

FlatbufferVector<MessageHeader> front =

768

std::move(channels_[channel_index].data.front());

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

769

channels_[channel_index].data.PopFront();

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

770

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

771

VLOG(1) << MaybeNodeName(target_node_) << "Popped Data " << this << " "

772

<< std::get<0>(timestamp) << " for "

773

<< configuration::StrippedChannelToString(

774

configuration()->channels()->Get(channel_index))

775

<< " (" << channel_index << ")";

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

776

777

QueueMessages(std::get<0>(timestamp));

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

778

779

return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),

std::move(front));

}

std::tuple<monotonic_clock::time_point, uint32_t,

784

FlatbufferVector<MessageHeader>>

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

785

SplitMessageReader::PopOldestTimestamp(int channel, int node_index) {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

786

CHECK_GT(channels_[channel].timestamps[node_index].size(), 0u);

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

787

const std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

788

timestamp = channels_[channel].timestamps[node_index].front_timestamp();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

789

FlatbufferVector<MessageHeader> front =

790

std::move(channels_[channel].timestamps[node_index].front());

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

791

channels_[channel].timestamps[node_index].PopFront();

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

792

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

793

VLOG(1) << MaybeNodeName(target_node_) << "Popped timestamp " << this << " "

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

794

<< std::get<0>(timestamp) << " for "

795

<< configuration::StrippedChannelToString(

796

configuration()->channels()->Get(channel))

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

797

<< " on "

798

<< configuration()->nodes()->Get(node_index)->name()->string_view()

799

<< " (" << node_index << ")";

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

800

801

QueueMessages(std::get<0>(timestamp));

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

802

803

return std::make_tuple(std::get<0>(timestamp), std::get<1>(timestamp),

std::move(front));

}

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

807

bool SplitMessageReader::MessageHeaderQueue::emplace_back(

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

808

FlatbufferVector<MessageHeader> &&msg) {

809

CHECK(split_reader != nullptr);

810

811

// If there is no timestamp merger for this queue, nobody is listening. Drop

812

// the message. This happens when a log file from another node is replayed,

813

// and the timestamp mergers down stream just don't care.

814

if (timestamp_merger == nullptr) {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

815

return false;

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

816

}

817

818

CHECK(timestamps != msg.message().has_data())

819

<< ": Got timestamps and data mixed up on a node. "

820

<< FlatbufferToJson(msg);

821

822

data_.emplace_back(std::move(msg));

823

824

if (data_.size() == 1u) {

825

// Yup, new data. Notify.

826

if (timestamps) {

827

timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());

828

} else {

829

timestamp_merger->Update(split_reader, front_timestamp());

830

}

831

}

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

832

833

return true;

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

834

}

835

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

836

void SplitMessageReader::MessageHeaderQueue::PopFront() {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

837

data_.pop_front();

838

if (data_.size() != 0u) {

839

// Yup, new data.

840

if (timestamps) {

841

timestamp_merger->UpdateTimestamp(split_reader, front_timestamp());

842

} else {

843

timestamp_merger->Update(split_reader, front_timestamp());

844

}

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

845

} else {

846

// Poke anyways to update the heap.

847

if (timestamps) {

848

timestamp_merger->UpdateTimestamp(

849

nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));

850

} else {

851

timestamp_merger->Update(

852

nullptr, std::make_tuple(monotonic_clock::min_time, 0, nullptr));

853

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

854

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

}

namespace {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

859

bool SplitMessageReaderHeapCompare(

860

const std::tuple<monotonic_clock::time_point, uint32_t,

861

SplitMessageReader *>

862

first,

863

const std::tuple<monotonic_clock::time_point, uint32_t,

864

SplitMessageReader *>

865

second) {

866

if (std::get<0>(first) > std::get<0>(second)) {

867

return true;

868

} else if (std::get<0>(first) == std::get<0>(second)) {

869

if (std::get<1>(first) > std::get<1>(second)) {

870

return true;

871

} else if (std::get<1>(first) == std::get<1>(second)) {

872

return std::get<2>(first) > std::get<2>(second);

} else {

return false;

}

} else {

return false;

}

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

881

bool ChannelHeapCompare(

882

const std::pair<monotonic_clock::time_point, int> first,

883

const std::pair<monotonic_clock::time_point, int> second) {

884

if (first.first > second.first) {

885

return true;

886

} else if (first.first == second.first) {

887

return first.second > second.second;

} else {

return false;

}

}

} // namespace

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

895

TimestampMerger::TimestampMerger(

896

const Configuration *configuration,

897

std::vector<SplitMessageReader *> split_message_readers, int channel_index,

898

const Node *target_node, ChannelMerger *channel_merger)

899

: configuration_(configuration),

900

split_message_readers_(std::move(split_message_readers)),

901

channel_index_(channel_index),

902

node_index_(configuration::MultiNode(configuration)

903

? configuration::GetNodeIndex(configuration, target_node)

904

: -1),

905

channel_merger_(channel_merger) {

906

// Tell the readers we care so they know who to notify.

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

907

VLOG(1) << "Configuring channel " << channel_index << " target node "

908

<< FlatbufferToJson(target_node);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

909

for (SplitMessageReader *reader : split_message_readers_) {

910

reader->SetTimestampMerger(this, channel_index, target_node);

911

}

912

913

// And then determine if we need to track timestamps.

914

const Channel *channel = configuration->channels()->Get(channel_index);

915

if (!configuration::ChannelIsSendableOnNode(channel, target_node) &&

916

configuration::ChannelIsReadableOnNode(channel, target_node)) {

917

has_timestamps_ = true;

}

}

void TimestampMerger::PushMessageHeap(

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

922

std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

923

timestamp,

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

924

SplitMessageReader *split_message_reader) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

925

if (split_message_reader != nullptr) {

926

DCHECK(std::find_if(message_heap_.begin(), message_heap_.end(),

927

[split_message_reader](

928

const std::tuple<monotonic_clock::time_point,

929

uint32_t, SplitMessageReader *>

930

x) {

931

return std::get<2>(x) == split_message_reader;

932

}) == message_heap_.end())

933

<< ": Pushing message when it is already in the heap.";

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

934

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

935

message_heap_.push_back(std::make_tuple(

936

std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

937

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

938

std::push_heap(message_heap_.begin(), message_heap_.end(),

939

&SplitMessageReaderHeapCompare);

940

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

941

942

// If we are just a data merger, don't wait for timestamps.

943

if (!has_timestamps_) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

944

if (!message_heap_.empty()) {

945

channel_merger_->Update(std::get<0>(message_heap_[0]), channel_index_);

946

pushed_ = true;

947

} else {

948

// Remove ourselves if we are empty.

949

channel_merger_->Update(monotonic_clock::min_time, channel_index_);

950

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

}

}

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

954

std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

955

TimestampMerger::oldest_message() const {

956

CHECK_GT(message_heap_.size(), 0u);

957

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

958

oldest_message_reader = message_heap_.front();

959

return std::get<2>(oldest_message_reader)->oldest_message(channel_index_);

960

}

961

962

std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

963

TimestampMerger::oldest_timestamp() const {

964

CHECK_GT(timestamp_heap_.size(), 0u);

965

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

966

oldest_message_reader = timestamp_heap_.front();

967

return std::get<2>(oldest_message_reader)

968

->oldest_message(channel_index_, node_index_);

969

}

970

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

971

void TimestampMerger::PushTimestampHeap(

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

972

std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

973

timestamp,

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

974

SplitMessageReader *split_message_reader) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

975

if (split_message_reader != nullptr) {

976

DCHECK(std::find_if(timestamp_heap_.begin(), timestamp_heap_.end(),

977

[split_message_reader](

978

const std::tuple<monotonic_clock::time_point,

979

uint32_t, SplitMessageReader *>

980

x) {

981

return std::get<2>(x) == split_message_reader;

982

}) == timestamp_heap_.end())

983

<< ": Pushing timestamp when it is already in the heap.";

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

984

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

985

timestamp_heap_.push_back(std::make_tuple(

986

std::get<0>(timestamp), std::get<1>(timestamp), split_message_reader));

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

987

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

988

std::push_heap(timestamp_heap_.begin(), timestamp_heap_.end(),

989

SplitMessageReaderHeapCompare);

990

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

991

992

// If we are a timestamp merger, don't wait for data. Missing data will be

993

// caught at read time.

994

if (has_timestamps_) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

995

if (!timestamp_heap_.empty()) {

996

channel_merger_->Update(std::get<0>(timestamp_heap_[0]), channel_index_);

997

pushed_ = true;

998

} else {

999

// Remove ourselves if we are empty.

1000

channel_merger_->Update(monotonic_clock::min_time, channel_index_);

1001

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

}

}

std::tuple<monotonic_clock::time_point, uint32_t,

1006

FlatbufferVector<MessageHeader>>

1007

TimestampMerger::PopMessageHeap() {

1008

// Pop the oldest message reader pointer off the heap.

1009

CHECK_GT(message_heap_.size(), 0u);

1010

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

1011

oldest_message_reader = message_heap_.front();

1012

1013

std::pop_heap(message_heap_.begin(), message_heap_.end(),

1014

&SplitMessageReaderHeapCompare);

1015

message_heap_.pop_back();

1016

1017

// Pop the oldest message. This re-pushes any messages from the reader to the

1018

// message heap.

1019

std::tuple<monotonic_clock::time_point, uint32_t,

1020

FlatbufferVector<MessageHeader>>

1021

oldest_message =

1022

std::get<2>(oldest_message_reader)->PopOldest(channel_index_);

1023

1024

// Confirm that the time and queue_index we have recorded matches.

1025

CHECK_EQ(std::get<0>(oldest_message), std::get<0>(oldest_message_reader));

1026

CHECK_EQ(std::get<1>(oldest_message), std::get<1>(oldest_message_reader));

1027

1028

// Now, keep reading until we have found all duplicates.

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1029

while (!message_heap_.empty()) {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1030

// See if it is a duplicate.

1031

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

1032

next_oldest_message_reader = message_heap_.front();

1033

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1034

std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

1035

next_oldest_message_time = std::get<2>(next_oldest_message_reader)

1036

->oldest_message(channel_index_);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1037

1038

if (std::get<0>(next_oldest_message_time) == std::get<0>(oldest_message) &&

1039

std::get<1>(next_oldest_message_time) == std::get<1>(oldest_message)) {

1040

// Pop the message reader pointer.

1041

std::pop_heap(message_heap_.begin(), message_heap_.end(),

1042

&SplitMessageReaderHeapCompare);

1043

message_heap_.pop_back();

1044

1045

// Pop the next oldest message. This re-pushes any messages from the

1046

// reader.

1047

std::tuple<monotonic_clock::time_point, uint32_t,

1048

FlatbufferVector<MessageHeader>>

1049

next_oldest_message = std::get<2>(next_oldest_message_reader)

1050

->PopOldest(channel_index_);

1051

1052

// And make sure the message matches in it's entirety.

1053

CHECK(std::get<2>(oldest_message).span() ==

1054

std::get<2>(next_oldest_message).span())

1055

<< ": Data at the same timestamp doesn't match.";

} else {

break;

}

}

return oldest_message;

1062

}

1063

1064

std::tuple<monotonic_clock::time_point, uint32_t,

1065

FlatbufferVector<MessageHeader>>

1066

TimestampMerger::PopTimestampHeap() {

1067

// Pop the oldest message reader pointer off the heap.

1068

CHECK_GT(timestamp_heap_.size(), 0u);

1069

1070

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

1071

oldest_timestamp_reader = timestamp_heap_.front();

1072

1073

std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),

1074

&SplitMessageReaderHeapCompare);

1075

timestamp_heap_.pop_back();

1076

1077

CHECK(node_index_ != -1) << ": Timestamps in a single node environment";

1078

1079

// Pop the oldest message. This re-pushes any timestamps from the reader to

1080

// the timestamp heap.

1081

std::tuple<monotonic_clock::time_point, uint32_t,

1082

FlatbufferVector<MessageHeader>>

1083

oldest_timestamp = std::get<2>(oldest_timestamp_reader)

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1084

->PopOldestTimestamp(channel_index_, node_index_);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1085

1086

// Confirm that the time we have recorded matches.

1087

CHECK_EQ(std::get<0>(oldest_timestamp), std::get<0>(oldest_timestamp_reader));

1088

CHECK_EQ(std::get<1>(oldest_timestamp), std::get<1>(oldest_timestamp_reader));

1089

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1090

// Now, keep reading until we have found all duplicates.

1091

while (!timestamp_heap_.empty()) {

1092

// See if it is a duplicate.

1093

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

1094

next_oldest_timestamp_reader = timestamp_heap_.front();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1095

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1096

std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

1097

next_oldest_timestamp_time =

1098

std::get<2>(next_oldest_timestamp_reader)

1099

->oldest_message(channel_index_, node_index_);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1100

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1101

if (std::get<0>(next_oldest_timestamp_time) ==

1102

std::get<0>(oldest_timestamp) &&

1103

std::get<1>(next_oldest_timestamp_time) ==

1104

std::get<1>(oldest_timestamp)) {

1105

// Pop the timestamp reader pointer.

1106

std::pop_heap(timestamp_heap_.begin(), timestamp_heap_.end(),

1107

&SplitMessageReaderHeapCompare);

1108

timestamp_heap_.pop_back();

1109

1110

// Pop the next oldest timestamp. This re-pushes any messages from the

1111

// reader.

1112

std::tuple<monotonic_clock::time_point, uint32_t,

1113

FlatbufferVector<MessageHeader>>

1114

next_oldest_timestamp =

1115

std::get<2>(next_oldest_timestamp_reader)

1116

->PopOldestTimestamp(channel_index_, node_index_);

1117

1118

// And make sure the contents matches in it's entirety.

1119

CHECK(std::get<2>(oldest_timestamp).span() ==

1120

std::get<2>(next_oldest_timestamp).span())

1121

<< ": Data at the same timestamp doesn't match, "

1122

<< aos::FlatbufferToJson(std::get<2>(oldest_timestamp)) << " vs "

1123

<< aos::FlatbufferToJson(std::get<2>(next_oldest_timestamp)) << " "

1124

<< absl::BytesToHexString(std::string_view(

1125

reinterpret_cast<const char *>(

1126

std::get<2>(oldest_timestamp).span().data()),

1127

std::get<2>(oldest_timestamp).span().size()))

1128

<< " vs "

1129

<< absl::BytesToHexString(std::string_view(

1130

reinterpret_cast<const char *>(

1131

std::get<2>(next_oldest_timestamp).span().data()),

1132

std::get<2>(next_oldest_timestamp).span().size()));

} else {

break;

}

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1137

}

1138

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1139

return oldest_timestamp;

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1140

}

1141

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1142

std::tuple<TimestampMerger::DeliveryTimestamp, FlatbufferVector<MessageHeader>>

1143

TimestampMerger::PopOldest() {

1144

if (has_timestamps_) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1145

VLOG(1) << "Looking for matching timestamp for "

1146

<< configuration::StrippedChannelToString(

1147

configuration_->channels()->Get(channel_index_))

1148

<< " (" << channel_index_ << ") "

1149

<< " at " << std::get<0>(oldest_timestamp());

1150

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1151

// Read the timestamps.

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1152

std::tuple<monotonic_clock::time_point, uint32_t,

1153

FlatbufferVector<MessageHeader>>

1154

oldest_timestamp = PopTimestampHeap();

1155

1156

TimestampMerger::DeliveryTimestamp timestamp;

1157

timestamp.monotonic_event_time =

1158

monotonic_clock::time_point(chrono::nanoseconds(

1159

std::get<2>(oldest_timestamp).message().monotonic_sent_time()));

1160

timestamp.realtime_event_time =

1161

realtime_clock::time_point(chrono::nanoseconds(

1162

std::get<2>(oldest_timestamp).message().realtime_sent_time()));

Austin Schuh

8d7e0bb

2020-10-02 17:57:00 -0700

[diff] [blame]

1163

timestamp.queue_index =

1164

std::get<2>(oldest_timestamp).message().queue_index();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1165

1166

// Consistency check.

1167

CHECK_EQ(timestamp.monotonic_event_time, std::get<0>(oldest_timestamp));

1168

CHECK_EQ(std::get<2>(oldest_timestamp).message().queue_index(),

1169

std::get<1>(oldest_timestamp));

1170

1171

monotonic_clock::time_point remote_timestamp_monotonic_time(

1172

chrono::nanoseconds(

1173

std::get<2>(oldest_timestamp).message().monotonic_remote_time()));

1174

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1175

// See if we have any data. If not, pass the problem up the chain.

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1176

if (message_heap_.empty()) {

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

1177

LOG(WARNING) << MaybeNodeName(configuration_->nodes()->Get(node_index_))

1178

<< "No data to match timestamp on "

1179

<< configuration::CleanedChannelToString(

1180

configuration_->channels()->Get(channel_index_))

1181

<< " (" << channel_index_ << ")";

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1182

return std::make_tuple(timestamp,

1183

std::move(std::get<2>(oldest_timestamp)));

1184

}

1185

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1186

while (true) {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1187

{

1188

// Ok, now try grabbing data until we find one which matches.

1189

std::tuple<monotonic_clock::time_point, uint32_t, const MessageHeader *>

1190

oldest_message_ref = oldest_message();

1191

1192

// Time at which the message was sent (this message is written from the

1193

// sending node's perspective.

1194

monotonic_clock::time_point remote_monotonic_time(chrono::nanoseconds(

1195

std::get<2>(oldest_message_ref)->monotonic_sent_time()));

1196

1197

if (remote_monotonic_time < remote_timestamp_monotonic_time) {

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

1198

LOG(WARNING) << configuration_->nodes()

->Get(node_index_)

->name()

->string_view()

<< " Undelivered message, skipping. Remote time is "

1203

<< remote_monotonic_time << " timestamp is "

1204

<< remote_timestamp_monotonic_time << " on channel "

1205

<< configuration::StrippedChannelToString(

1206

configuration_->channels()->Get(channel_index_))

1207

<< " (" << channel_index_ << ")";

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1208

PopMessageHeap();

1209

continue;

1210

} else if (remote_monotonic_time > remote_timestamp_monotonic_time) {

Austin Schuh

2020-08-24 16:06:09 -0700

[diff] [blame]

1211

LOG(WARNING) << configuration_->nodes()

->Get(node_index_)

->name()

->string_view()

<< " Data not found. Remote time should be "

1216

<< remote_timestamp_monotonic_time

1217

<< ", message time is " << remote_monotonic_time

1218

<< " on channel "

1219

<< configuration::StrippedChannelToString(

1220

configuration_->channels()->Get(channel_index_))

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1221

<< " (" << channel_index_ << ")"

1222

<< (VLOG_IS_ON(1) ? DebugString() : "");

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1223

return std::make_tuple(timestamp,

1224

std::move(std::get<2>(oldest_timestamp)));

1225

}

1226

1227

timestamp.monotonic_remote_time = remote_monotonic_time;

1228

}

1229

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1230

VLOG(1) << "Found matching data "

1231

<< configuration::StrippedChannelToString(

1232

configuration_->channels()->Get(channel_index_))

1233

<< " (" << channel_index_ << ")";

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1234

std::tuple<monotonic_clock::time_point, uint32_t,

1235

FlatbufferVector<MessageHeader>>

1236

oldest_message = PopMessageHeap();

1237

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1238

timestamp.realtime_remote_time =

1239

realtime_clock::time_point(chrono::nanoseconds(

1240

std::get<2>(oldest_message).message().realtime_sent_time()));

1241

timestamp.remote_queue_index =

1242

std::get<2>(oldest_message).message().queue_index();

1243

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1244

CHECK_EQ(timestamp.monotonic_remote_time,

1245

remote_timestamp_monotonic_time);

1246

1247

CHECK_EQ(timestamp.remote_queue_index,

1248

std::get<2>(oldest_timestamp).message().remote_queue_index())

1249

<< ": " << FlatbufferToJson(&std::get<2>(oldest_timestamp).message())

1250

<< " data "

1251

<< FlatbufferToJson(&std::get<2>(oldest_message).message());

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1252

Austin Schuh

30dd5c5

2020-08-01 14:43:44 -0700

[diff] [blame]

1253

return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1254

}

1255

} else {

1256

std::tuple<monotonic_clock::time_point, uint32_t,

1257

FlatbufferVector<MessageHeader>>

1258

oldest_message = PopMessageHeap();

1259

1260

TimestampMerger::DeliveryTimestamp timestamp;

1261

timestamp.monotonic_event_time =

1262

monotonic_clock::time_point(chrono::nanoseconds(

1263

std::get<2>(oldest_message).message().monotonic_sent_time()));

1264

timestamp.realtime_event_time =

1265

realtime_clock::time_point(chrono::nanoseconds(

1266

std::get<2>(oldest_message).message().realtime_sent_time()));

Austin Schuh

8d7e0bb

2020-10-02 17:57:00 -0700

[diff] [blame]

1267

timestamp.queue_index = std::get<2>(oldest_message).message().queue_index();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1268

timestamp.remote_queue_index = 0xffffffff;

1269

1270

CHECK_EQ(std::get<0>(oldest_message), timestamp.monotonic_event_time);

1271

CHECK_EQ(std::get<1>(oldest_message),

1272

std::get<2>(oldest_message).message().queue_index());

1273

Austin Schuh

30dd5c5

2020-08-01 14:43:44 -0700

[diff] [blame]

1274

return std::make_tuple(timestamp, std::move(std::get<2>(oldest_message)));

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

}

}

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1278

void TimestampMerger::NoticeAtEnd() { channel_merger_->NoticeAtEnd(); }

1279

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1280

namespace {

1281

std::vector<std::unique_ptr<SplitMessageReader>> MakeSplitMessageReaders(

1282

const std::vector<std::vector<std::string>> &filenames) {

1283

CHECK_GT(filenames.size(), 0u);

1284

// Build up all the SplitMessageReaders.

1285

std::vector<std::unique_ptr<SplitMessageReader>> result;

1286

for (const std::vector<std::string> &filenames : filenames) {

1287

result.emplace_back(std::make_unique<SplitMessageReader>(filenames));

}

return result;

}

} // namespace

ChannelMerger::ChannelMerger(

1294

const std::vector<std::vector<std::string>> &filenames)

1295

: split_message_readers_(MakeSplitMessageReaders(filenames)),

Austin Schuh

2020-08-01 14:42:45 -0700

[diff] [blame]

1296

log_file_header_(split_message_readers_[0]->raw_log_file_header()) {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1297

// Now, confirm that the configuration matches for each and pick a start time.

1298

// Also return the list of possible nodes.

1299

for (const std::unique_ptr<SplitMessageReader> &reader :

1300

split_message_readers_) {

1301

CHECK(CompareFlatBuffer(log_file_header_.message().configuration(),

1302

reader->log_file_header()->configuration()))

1303

<< ": Replaying log files with different configurations isn't "

"supported";

}

nodes_ = configuration::GetNodes(configuration());

1308

}

1309

1310

bool ChannelMerger::SetNode(const Node *target_node) {

1311

std::vector<SplitMessageReader *> split_message_readers;

1312

for (const std::unique_ptr<SplitMessageReader> &reader :

1313

split_message_readers_) {

1314

split_message_readers.emplace_back(reader.get());

1315

}

1316

1317

// Go find a log_file_header for this node.

1318

{

1319

bool found_node = false;

1320

1321

for (const std::unique_ptr<SplitMessageReader> &reader :

1322

split_message_readers_) {

James Kuszmaul

fc273dc

2020-05-09 17:56:19 -0700

[diff] [blame]

1323

// In order to identify which logfile(s) map to the target node, do a

1324

// logical comparison of the nodes, by confirming that we are either in a

1325

// single-node setup (where the nodes will both be nullptr) or that the

1326

// node names match (but the other node fields--e.g., hostname lists--may

1327

// not).

1328

const bool both_null =

1329

reader->node() == nullptr && target_node == nullptr;

1330

const bool both_have_name =

1331

(reader->node() != nullptr) && (target_node != nullptr) &&

1332

(reader->node()->has_name() && target_node->has_name());

1333

const bool node_names_identical =

Brian Silverman

d90905f

2020-09-23 14:42:56 -0700

[diff] [blame]

1334

both_have_name && (reader->node()->name()->string_view() ==

1335

target_node->name()->string_view());

James Kuszmaul

fc273dc

2020-05-09 17:56:19 -0700

[diff] [blame]

1336

if (both_null || node_names_identical) {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1337

if (!found_node) {

1338

found_node = true;

1339

log_file_header_ = CopyFlatBuffer(reader->log_file_header());

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1340

VLOG(1) << "Found log file " << reader->filename() << " with node "

1341

<< FlatbufferToJson(reader->node()) << " start_time "

1342

<< monotonic_start_time();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1343

} else {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1344

// Find the earliest start time. That way, if we get a full log file

1345

// directly from the node, and a partial later, we start with the

1346

// full. Update our header to match that.

1347

const monotonic_clock::time_point new_monotonic_start_time(

1348

chrono::nanoseconds(

1349

reader->log_file_header()->monotonic_start_time()));

1350

const realtime_clock::time_point new_realtime_start_time(

1351

chrono::nanoseconds(

1352

reader->log_file_header()->realtime_start_time()));

1353

1354

if (monotonic_start_time() == monotonic_clock::min_time ||

1355

(new_monotonic_start_time != monotonic_clock::min_time &&

1356

new_monotonic_start_time < monotonic_start_time())) {

1357

log_file_header_.mutable_message()->mutate_monotonic_start_time(

1358

new_monotonic_start_time.time_since_epoch().count());

1359

log_file_header_.mutable_message()->mutate_realtime_start_time(

1360

new_realtime_start_time.time_since_epoch().count());

1361

VLOG(1) << "Updated log file " << reader->filename()

1362

<< " with node " << FlatbufferToJson(reader->node())

1363

<< " start_time " << new_monotonic_start_time;

1364

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

}

}

}

if (!found_node) {

LOG(WARNING) << "Failed to find log file for node "

1371

<< FlatbufferToJson(target_node);

return false;

}

}

// Build up all the timestamp mergers. This connects up all the

1377

// SplitMessageReaders.

1378

timestamp_mergers_.reserve(configuration()->channels()->size());

1379

for (size_t channel_index = 0;

1380

channel_index < configuration()->channels()->size(); ++channel_index) {

1381

timestamp_mergers_.emplace_back(

1382

configuration(), split_message_readers, channel_index,

1383

configuration::GetNode(configuration(), target_node), this);

1384

}

1385

1386

// And prime everything.

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1387

for (std::unique_ptr<SplitMessageReader> &split_message_reader :

1388

split_message_readers_) {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1389

split_message_reader->QueueMessages(

1390

split_message_reader->monotonic_start_time());

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1391

}

1392

1393

node_ = configuration::GetNodeOrDie(configuration(), target_node);

return true;

}

Austin Schuh

2020-08-31 16:56:12 -0700

[diff] [blame]

1397

monotonic_clock::time_point ChannelMerger::OldestMessageTime() const {

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1398

if (channel_heap_.empty()) {

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1399

return monotonic_clock::max_time;

1400

}

1401

return channel_heap_.front().first;

1402

}

1403

1404

void ChannelMerger::PushChannelHeap(monotonic_clock::time_point timestamp,

1405

int channel_index) {

1406

// Pop and recreate the heap if it has already been pushed. And since we are

1407

// pushing again, we don't need to clear pushed.

1408

if (timestamp_mergers_[channel_index].pushed()) {

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1409

const auto channel_iterator = std::find_if(

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1410

channel_heap_.begin(), channel_heap_.end(),

1411

[channel_index](const std::pair<monotonic_clock::time_point, int> x) {

1412

return x.second == channel_index;

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1413

});

1414

DCHECK(channel_iterator != channel_heap_.end());

1415

if (std::get<0>(*channel_iterator) == timestamp) {

1416

// It's already in the heap, in the correct spot, so nothing

1417

// more for us to do here.

1418

return;

1419

}

1420

channel_heap_.erase(channel_iterator);

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1421

std::make_heap(channel_heap_.begin(), channel_heap_.end(),

ChannelHeapCompare);

}

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1425

if (timestamp == monotonic_clock::min_time) {

1426

timestamp_mergers_[channel_index].set_pushed(false);

return;

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1430

channel_heap_.push_back(std::make_pair(timestamp, channel_index));

1431

1432

// The default sort puts the newest message first. Use a custom comparator to

1433

// put the oldest message first.

1434

std::push_heap(channel_heap_.begin(), channel_heap_.end(),

ChannelHeapCompare);

}

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1438

void ChannelMerger::VerifyHeaps() {

Austin Schuh

661a8d8

2020-09-13 17:25:56 -0700

[diff] [blame]

1439

std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =

1440

channel_heap_;

1441

std::make_heap(channel_heap.begin(), channel_heap.end(), &ChannelHeapCompare);

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1442

Austin Schuh

661a8d8

2020-09-13 17:25:56 -0700

[diff] [blame]

1443

for (size_t i = 0; i < channel_heap_.size(); ++i) {

1444

CHECK(channel_heap_[i] == channel_heap[i]) << ": Heaps diverged...";

1445

CHECK_EQ(

1446

std::get<0>(channel_heap[i]),

1447

timestamp_mergers_[std::get<1>(channel_heap[i])].channel_merger_time());

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

}

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1451

std::tuple<TimestampMerger::DeliveryTimestamp, int,

1452

FlatbufferVector<MessageHeader>>

1453

ChannelMerger::PopOldest() {

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1454

CHECK_GT(channel_heap_.size(), 0u);

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1455

std::pair<monotonic_clock::time_point, int> oldest_channel_data =

1456

channel_heap_.front();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1457

int channel_index = oldest_channel_data.second;

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1458

std::pop_heap(channel_heap_.begin(), channel_heap_.end(),

1459

&ChannelHeapCompare);

1460

channel_heap_.pop_back();

Austin Schuh

2020-02-13 21:18:22 -0800

[diff] [blame]

1461

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1462

timestamp_mergers_[channel_index].set_pushed(false);

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1463

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1464

TimestampMerger *merger = &timestamp_mergers_[channel_index];

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1465

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1466

// Merger handles any queueing needed from here.

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1467

std::tuple<TimestampMerger::DeliveryTimestamp,

1468

FlatbufferVector<MessageHeader>>

1469

message = merger->PopOldest();

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1470

DCHECK_EQ(std::get<0>(message).monotonic_event_time,

1471

oldest_channel_data.first)

1472

<< ": channel_heap_ was corrupted for " << channel_index << ": "

1473

<< DebugString();

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1474

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1475

CHECK_GE(std::get<0>(message).monotonic_event_time, last_popped_time_)

1476

<< ": " << MaybeNodeName(log_file_header()->node())

1477

<< "Messages came off the queue out of order. " << DebugString();

1478

last_popped_time_ = std::get<0>(message).monotonic_event_time;

1479

1480

VLOG(1) << "Popped " << last_popped_time_ << " "

1481

<< configuration::StrippedChannelToString(

1482

configuration()->channels()->Get(channel_index))

1483

<< " (" << channel_index << ")";

1484

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1485

return std::make_tuple(std::get<0>(message), channel_index,

1486

std::move(std::get<1>(message)));

1487

}

1488

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1489

std::string SplitMessageReader::MessageHeaderQueue::DebugString() const {

1490

std::stringstream ss;

1491

for (size_t i = 0; i < data_.size(); ++i) {

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1492

if (i < 5 || i + 5 > data_.size()) {

if (timestamps) {

ss << " msg: ";

} else {

ss << " timestamp: ";

1497

}

1498

ss << monotonic_clock::time_point(

1499

chrono::nanoseconds(data_[i].message().monotonic_sent_time()))

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1500

<< " ("

Austin Schuh

2020-09-01 22:38:28 -0700

[diff] [blame]

1501

<< realtime_clock::time_point(

1502

chrono::nanoseconds(data_[i].message().realtime_sent_time()))

1503

<< ") " << data_[i].message().queue_index();

1504

if (timestamps) {

1505

ss << " <- remote "

1506

<< monotonic_clock::time_point(chrono::nanoseconds(

1507

data_[i].message().monotonic_remote_time()))

1508

<< " ("

1509

<< realtime_clock::time_point(chrono::nanoseconds(

1510

data_[i].message().realtime_remote_time()))

<< ")";

}

ss << "\n";

} else if (i == 5) {

ss << " ...\n";

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1516

}

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1517

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1518

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1519

return ss.str();

1520

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1521

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1522

std::string SplitMessageReader::DebugString(int channel) const {

1523

std::stringstream ss;

1524

ss << "[\n";

1525

ss << channels_[channel].data.DebugString();

1526

ss << " ]";

1527

return ss.str();

1528

}

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1529

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1530

std::string SplitMessageReader::DebugString(int channel, int node_index) const {

1531

std::stringstream ss;

1532

ss << "[\n";

1533

ss << channels_[channel].timestamps[node_index].DebugString();

ss << " ]";

return ss.str();

}

std::string TimestampMerger::DebugString() const {

1539

std::stringstream ss;

1540

1541

if (timestamp_heap_.size() > 0) {

1542

ss << " timestamp_heap {\n";

1543

std::vector<

1544

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>

1545

timestamp_heap = timestamp_heap_;

1546

while (timestamp_heap.size() > 0u) {

1547

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

1548

oldest_timestamp_reader = timestamp_heap.front();

1549

1550

ss << " " << std::get<2>(oldest_timestamp_reader) << " "

1551

<< std::get<0>(oldest_timestamp_reader) << " queue_index ("

1552

<< std::get<1>(oldest_timestamp_reader) << ") ttq "

1553

<< std::get<2>(oldest_timestamp_reader)->time_to_queue() << " "

1554

<< std::get<2>(oldest_timestamp_reader)->filename() << " -> "

1555

<< std::get<2>(oldest_timestamp_reader)

1556

->DebugString(channel_index_, node_index_)

1557

<< "\n";

1558

1559

std::pop_heap(timestamp_heap.begin(), timestamp_heap.end(),

1560

&SplitMessageReaderHeapCompare);

1561

timestamp_heap.pop_back();

}

ss << " }\n";

}

ss << " message_heap {\n";

1567

{

1568

std::vector<

1569

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>>

1570

message_heap = message_heap_;

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1571

while (!message_heap.empty()) {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1572

std::tuple<monotonic_clock::time_point, uint32_t, SplitMessageReader *>

1573

oldest_message_reader = message_heap.front();

1574

1575

ss << " " << std::get<2>(oldest_message_reader) << " "

1576

<< std::get<0>(oldest_message_reader) << " queue_index ("

1577

<< std::get<1>(oldest_message_reader) << ") ttq "

1578

<< std::get<2>(oldest_message_reader)->time_to_queue() << " "

1579

<< std::get<2>(oldest_message_reader)->filename() << " -> "

1580

<< std::get<2>(oldest_message_reader)->DebugString(channel_index_)

1581

<< "\n";

1582

1583

std::pop_heap(message_heap.begin(), message_heap.end(),

1584

&SplitMessageReaderHeapCompare);

1585

message_heap.pop_back();

Austin Schuh

2020-01-26 20:34:50 -0800

[diff] [blame]

1586

}

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1587

}

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

ss << " }";

return ss.str();

}

std::string ChannelMerger::DebugString() const {

1594

std::stringstream ss;

1595

ss << "start_time " << realtime_start_time() << " " << monotonic_start_time()

1596

<< "\n";

1597

ss << "channel_heap {\n";

1598

std::vector<std::pair<monotonic_clock::time_point, int>> channel_heap =

1599

channel_heap_;

Brian Silverman

2020-08-12 12:02:38 -0700

[diff] [blame]

1600

while (!channel_heap.empty()) {

Austin Schuh

2020-02-02 17:30:07 -0800

[diff] [blame]

1601

std::tuple<monotonic_clock::time_point, int> channel = channel_heap.front();

1602

ss << " " << std::get<0>(channel) << " (" << std::get<1>(channel) << ") "

1603

<< configuration::CleanedChannelToString(

1604

configuration()->channels()->Get(std::get<1>(channel)))

1605

<< "\n";

1606

1607

ss << timestamp_mergers_[std::get<1>(channel)].DebugString() << "\n";

1608

1609

std::pop_heap(channel_heap.begin(), channel_heap.end(),

1610

&ChannelHeapCompare);

1611

channel_heap.pop_back();

}

ss << "}";

return ss.str();

Austin Schuh

2020-01-01 17:11:17 -0800

[diff] [blame]

1616

}

1617

Austin Schuh