Blame - aos/events/logging/lzma_encoder.cc - RealtimeRoboticsGroup/test

blob: f354638c1ba67bf00b1a0c38a55dcdf9785326ce [file] [log] [blame]

Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	1	#include "aos/events/logging/lzma_encoder.h"
				2
				3	#include "glog/logging.h"
				4
Austin Schuh	be91b34	2022-06-27 00:53:45 -0700	[diff] [blame]	5	DEFINE_int32(lzma_threads, 1, "Number of threads to use for encoding");
				6
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	7	namespace aos::logger {
				8	namespace {
				9
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	10	// Returns true if `status` is not an error code, false if it is recoverable, or
				11	// otherwise logs the appropriate error message and crashes.
Austin Schuh	ed292dc	2020-12-22 22:32:59 -0800	[diff] [blame]	12	bool LzmaCodeIsOk(lzma_ret status, std::string_view filename = "") {
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	13	switch (status) {
				14	case LZMA_OK:
				15	case LZMA_STREAM_END:
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	16	return true;
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	17	case LZMA_MEM_ERROR:
				18	LOG(FATAL) << "Memory allocation failed:" << status;
				19	case LZMA_OPTIONS_ERROR:
				20	LOG(FATAL) << "The given compression preset or decompression options are "
				21	"not supported: "
				22	<< status;
				23	case LZMA_UNSUPPORTED_CHECK:
				24	LOG(FATAL) << "The given check type is not supported: " << status;
				25	case LZMA_PROG_ERROR:
				26	LOG(FATAL) << "One or more of the parameters have values that will never "
				27	"be valid: "
				28	<< status;
				29	case LZMA_MEMLIMIT_ERROR:
				30	LOG(FATAL) << "Decoder needs more memory than allowed by the specified "
				31	"memory usage limit: "
				32	<< status;
				33	case LZMA_FORMAT_ERROR:
Austin Schuh	ed292dc	2020-12-22 22:32:59 -0800	[diff] [blame]	34	if (filename.empty()) {
				35	LOG(FATAL) << "File format not recognized: " << status;
				36	} else {
				37	LOG(FATAL) << "File format of " << filename
				38	<< " not recognized: " << status;
				39	}
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	40	case LZMA_DATA_ERROR:
Brian Silverman	517431e	2021-11-10 12:48:58 -0800	[diff] [blame]	41	VLOG(1) << "Compressed file is corrupt: " << status;
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	42	return false;
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	43	case LZMA_BUF_ERROR:
Brian Silverman	517431e	2021-11-10 12:48:58 -0800	[diff] [blame]	44	VLOG(1) << "Compressed file is truncated or corrupt: " << status;
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	45	return false;
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	46	default:
				47	LOG(FATAL) << "Unexpected return value: " << status;
				48	}
				49	}
				50
				51	} // namespace
				52
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	53	LzmaEncoder::LzmaEncoder(size_t max_message_size,
				54	const uint32_t compression_preset, size_t block_size)
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	55	: stream_(LZMA_STREAM_INIT), compression_preset_(compression_preset) {
				56	CHECK_GE(compression_preset_, 0u)
				57	<< ": Compression preset must be in the range [0, 9].";
				58	CHECK_LE(compression_preset_, 9u)
				59	<< ": Compression preset must be in the range [0, 9].";
				60
Austin Schuh	be91b34	2022-06-27 00:53:45 -0700	[diff] [blame]	61	if (FLAGS_lzma_threads <= 1) {
				62	lzma_ret status =
				63	lzma_easy_encoder(&stream_, compression_preset_, LZMA_CHECK_CRC64);
				64	CHECK(LzmaCodeIsOk(status));
				65	} else {
				66	lzma_mt mt_options;
				67	memset(&mt_options, 0, sizeof(mt_options));
				68	mt_options.threads = FLAGS_lzma_threads;
				69	mt_options.block_size = block_size;
				70	// Compress for at most 100 ms before relinquishing control back to the main
				71	// thread.
				72	mt_options.timeout = 100;
				73	mt_options.preset = compression_preset_;
				74	mt_options.filters = nullptr;
				75	mt_options.check = LZMA_CHECK_CRC64;
				76	lzma_ret status = lzma_stream_encoder_mt(&stream_, &mt_options);
				77	CHECK(LzmaCodeIsOk(status));
				78	}
				79
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	80	stream_.avail_out = 0;
				81	VLOG(2) << "LzmaEncoder: Initialization succeeded.";
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	82
				83	// TODO(austin): We don't write the biggest messages very often. Is it more
				84	// efficient to allocate if we go over a threshold to keep the static memory
				85	// in use smaller, or just allocate the worst case like we are doing here?
				86	input_buffer_.resize(max_message_size);
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	87	}
				88
				89	LzmaEncoder::~LzmaEncoder() { lzma_end(&stream_); }
				90
Austin Schuh	8bdfc49	2023-02-11 12:53:13 -0800	[diff] [blame]	91	size_t LzmaEncoder::Encode(Copier *copy, size_t start_byte) {
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	92	const size_t copy_size = copy->size();
				93	// LZMA compresses the data as it goes along, copying the compressed results
				94	// into another buffer. So, there's no need to store more than one message
				95	// since lzma is going to take it from here.
				96	CHECK_LE(copy_size, input_buffer_.size());
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	97
Austin Schuh	8bdfc49	2023-02-11 12:53:13 -0800	[diff] [blame]	98	CHECK_EQ(copy->Copy(input_buffer_.data(), start_byte, copy_size - start_byte),
				99	copy_size - start_byte);
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	100
				101	stream_.next_in = input_buffer_.data();
				102	stream_.avail_in = copy_size;
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	103
				104	RunLzmaCode(LZMA_RUN);
Austin Schuh	8bdfc49	2023-02-11 12:53:13 -0800	[diff] [blame]	105
				106	return copy_size - start_byte;
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	107	}
				108
				109	void LzmaEncoder::Finish() { RunLzmaCode(LZMA_FINISH); }
				110
				111	void LzmaEncoder::Clear(const int n) {
				112	CHECK_GE(n, 0);
				113	CHECK_LE(static_cast<size_t>(n), queue_size());
				114	queue_.erase(queue_.begin(), queue_.begin() + n);
				115	if (queue_.empty()) {
				116	stream_.next_out = nullptr;
				117	stream_.avail_out = 0;
				118	}
				119	}
				120
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	121	absl::Span<const absl::Span<const uint8_t>> LzmaEncoder::queue() {
				122	return_queue_.clear();
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	123	if (queue_.empty()) {
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	124	return return_queue_;
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	125	}
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	126	return_queue_.reserve(queue_.size());
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	127	for (size_t i = 0; i < queue_.size() - 1; ++i) {
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	128	return_queue_.emplace_back(
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	129	absl::MakeConstSpan(queue_.at(i).data(), queue_.at(i).size()));
				130	}
				131	// For the last buffer in the queue, we must account for the possibility that
				132	// the buffer isn't full yet.
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	133	return_queue_.emplace_back(absl::MakeConstSpan(
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	134	queue_.back().data(), queue_.back().size() - stream_.avail_out));
Austin Schuh	48d10d6	2022-10-16 22:19:23 -0700	[diff] [blame]	135	return return_queue_;
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	136	}
				137
				138	size_t LzmaEncoder::queued_bytes() const {
				139	size_t bytes = queue_size() * kEncodedBufferSizeBytes;
				140	// Subtract the bytes that the encoder hasn't filled yet.
				141	bytes -= stream_.avail_out;
				142	return bytes;
				143	}
				144
				145	void LzmaEncoder::RunLzmaCode(lzma_action action) {
				146	CHECK(!finished_);
				147
				148	// This is to keep track of how many bytes resulted from encoding this input
				149	// buffer.
				150	size_t last_avail_out = stream_.avail_out;
				151
				152	while (stream_.avail_in > 0 \|\| action == LZMA_FINISH) {
				153	// If output buffer is full, create a new one, queue it up, and resume
				154	// encoding. This could happen in the first call to Encode after
				155	// construction or a Reset, or when an input buffer is large enough to fill
				156	// more than one output buffer.
				157	if (stream_.avail_out == 0) {
				158	queue_.emplace_back();
				159	queue_.back().resize(kEncodedBufferSizeBytes);
				160	stream_.next_out = queue_.back().data();
				161	stream_.avail_out = kEncodedBufferSizeBytes;
				162	// Update the byte count.
				163	total_bytes_ += last_avail_out;
				164	last_avail_out = stream_.avail_out;
				165	}
				166
				167	// Encode the data.
				168	lzma_ret status = lzma_code(&stream_, action);
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	169	CHECK(LzmaCodeIsOk(status));
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	170	if (action == LZMA_FINISH) {
				171	if (status == LZMA_STREAM_END) {
				172	// This is returned when lzma_code is all done.
				173	finished_ = true;
				174	break;
				175	}
				176	} else {
				177	CHECK(status != LZMA_STREAM_END);
				178	}
				179	VLOG(2) << "LzmaEncoder: Encoded chunk.";
				180	}
				181
				182	// Update the number of resulting encoded bytes.
				183	total_bytes_ += last_avail_out - stream_.avail_out;
				184	}
				185
Austin Schuh	cd36842	2021-11-22 21:23:29 -0800	[diff] [blame]	186	LzmaDecoder::LzmaDecoder(std::unique_ptr<DataDecoder> underlying_decoder,
				187	bool quiet)
Tyler Chatow	2015bc6	2021-08-04 21:15:09 -0700	[diff] [blame]	188	: underlying_decoder_(std::move(underlying_decoder)),
Austin Schuh	cd36842	2021-11-22 21:23:29 -0800	[diff] [blame]	189	stream_(LZMA_STREAM_INIT),
				190	quiet_(quiet) {
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	191	compressed_data_.resize(kBufSize);
				192
				193	lzma_ret status =
				194	lzma_stream_decoder(&stream_, UINT64_MAX, LZMA_CONCATENATED);
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	195	CHECK(LzmaCodeIsOk(status)) << "Failed initializing LZMA stream decoder.";
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	196	stream_.avail_out = 0;
				197	VLOG(2) << "LzmaDecoder: Initialization succeeded.";
				198	}
				199
				200	LzmaDecoder::~LzmaDecoder() { lzma_end(&stream_); }
				201
				202	size_t LzmaDecoder::Read(uint8_t begin, uint8_t end) {
				203	if (finished_) {
				204	return 0;
				205	}
				206
				207	// Write into the given range.
				208	stream_.next_out = begin;
				209	stream_.avail_out = end - begin;
				210	// Keep decompressing until we run out of buffer space.
				211	while (stream_.avail_out > 0) {
				212	if (action_ == LZMA_RUN && stream_.avail_in == 0) {
				213	// Read more bytes from the file if we're all out.
Tyler Chatow	2015bc6	2021-08-04 21:15:09 -0700	[diff] [blame]	214	const size_t count = underlying_decoder_->Read(compressed_data_.begin(),
				215	compressed_data_.end());
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	216	if (count == 0) {
				217	// No more data to read in the file, begin the finishing operation.
				218	action_ = LZMA_FINISH;
				219	} else {
				220	stream_.next_in = compressed_data_.data();
				221	stream_.avail_in = count;
				222	}
				223	}
				224	// Decompress the data.
				225	const lzma_ret status = lzma_code(&stream_, action_);
				226	// Return if we're done.
				227	if (status == LZMA_STREAM_END) {
				228	CHECK_EQ(action_, LZMA_FINISH)
				229	<< ": Got LZMA_STREAM_END when action wasn't LZMA_FINISH";
				230	finished_ = true;
				231	return (end - begin) - stream_.avail_out;
				232	}
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	233
				234	// If we fail to decompress, give up. Return everything that has been
				235	// produced so far.
Tyler Chatow	2015bc6	2021-08-04 21:15:09 -0700	[diff] [blame]	236	if (!LzmaCodeIsOk(status, filename())) {
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	237	finished_ = true;
Brian Silverman	517431e	2021-11-10 12:48:58 -0800	[diff] [blame]	238	if (status == LZMA_DATA_ERROR) {
Austin Schuh	cd36842	2021-11-22 21:23:29 -0800	[diff] [blame]	239	if (!quiet_ \|\| VLOG_IS_ON(1)) {
				240	LOG(WARNING) << filename() << " is corrupted.";
				241	}
Brian Silverman	517431e	2021-11-10 12:48:58 -0800	[diff] [blame]	242	} else if (status == LZMA_BUF_ERROR) {
Austin Schuh	cd36842	2021-11-22 21:23:29 -0800	[diff] [blame]	243	if (!quiet_ \|\| VLOG_IS_ON(1)) {
				244	LOG(WARNING) << filename() << " is truncated or corrupted.";
				245	}
Brian Silverman	517431e	2021-11-10 12:48:58 -0800	[diff] [blame]	246	} else {
				247	LOG(FATAL) << "Unknown error " << status << " when reading "
				248	<< filename();
				249	}
Austin Schuh	3bd4c40	2020-11-06 18:19:06 -0800	[diff] [blame]	250	return (end - begin) - stream_.avail_out;
				251	}
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	252	}
				253	return end - begin;
				254	}
				255
Tyler Chatow	2015bc6	2021-08-04 21:15:09 -0700	[diff] [blame]	256	ThreadedLzmaDecoder::ThreadedLzmaDecoder(
Austin Schuh	cd36842	2021-11-22 21:23:29 -0800	[diff] [blame]	257	std::unique_ptr<DataDecoder> underlying_decoder, bool quiet)
				258	: decoder_(std::move(underlying_decoder), quiet), decode_thread_([this] {
Tyler Chatow	7df6083	2021-07-15 21:18:36 -0700	[diff] [blame]	259	std::unique_lock lock(decode_mutex_);
				260	while (true) {
				261	// Wake if the queue is too small or we are finished.
				262	continue_decoding_.wait(lock, [this] {
				263	return decoded_queue_.size() < kQueueSize \|\| finished_;
				264	});
				265
				266	if (finished_) {
				267	return;
				268	}
				269
				270	while (true) {
				271	CHECK(!finished_);
				272	// Release our lock on the queue before doing decompression work.
				273	lock.unlock();
				274
				275	ResizeableBuffer buffer;
				276	buffer.resize(kBufSize);
				277
				278	const size_t bytes_read =
				279	decoder_.Read(buffer.begin(), buffer.end());
				280	buffer.resize(bytes_read);
				281
				282	// Relock the queue and move the new buffer to the end. This should
				283	// be fast. We also need to stay locked when we wait().
				284	lock.lock();
				285	if (bytes_read > 0) {
				286	decoded_queue_.emplace_back(std::move(buffer));
				287	} else {
				288	finished_ = true;
				289	}
				290
				291	// If we've filled the queue or are out of data, go back to sleep.
				292	if (decoded_queue_.size() >= kQueueSize \|\| finished_) {
				293	break;
				294	}
				295	}
				296
				297	// Notify main thread in case it was waiting for us to queue more
				298	// data.
				299	queue_filled_.notify_one();
				300	}
				301	}) {}
				302
				303	ThreadedLzmaDecoder::~ThreadedLzmaDecoder() {
				304	// Wake up decode thread so it can return.
				305	{
				306	std::scoped_lock lock(decode_mutex_);
				307	finished_ = true;
				308	}
				309	continue_decoding_.notify_one();
				310	decode_thread_.join();
				311	}
				312
				313	size_t ThreadedLzmaDecoder::Read(uint8_t begin, uint8_t end) {
				314	std::unique_lock lock(decode_mutex_);
				315
				316	// Strip any empty buffers
				317	for (auto iter = decoded_queue_.begin(); iter != decoded_queue_.end();) {
				318	if (iter->size() == 0) {
				319	iter = decoded_queue_.erase(iter);
				320	} else {
				321	++iter;
				322	}
				323	}
				324
				325	// If the queue is empty, sleep until the decoder thread has produced another
				326	// buffer.
				327	if (decoded_queue_.empty()) {
				328	continue_decoding_.notify_one();
				329	queue_filled_.wait(lock,
				330	[this] { return finished_ \|\| !decoded_queue_.empty(); });
				331	if (finished_ && decoded_queue_.empty()) {
				332	return 0;
				333	}
				334	}
				335	// Sanity check if the queue is empty and we're not finished.
				336	CHECK(!decoded_queue_.empty()) << "Decoded queue unexpectedly empty";
				337
				338	ResizeableBuffer &front_buffer = decoded_queue_.front();
				339
				340	// Copy some data from our working buffer to the requested destination.
				341	const std::size_t bytes_requested = end - begin;
				342	const std::size_t bytes_to_copy =
				343	std::min(bytes_requested, front_buffer.size());
				344	memcpy(begin, front_buffer.data(), bytes_to_copy);
				345	front_buffer.erase_front(bytes_to_copy);
				346
				347	// Ensure the decoding thread wakes up if the queue isn't full.
				348	if (!finished_ && decoded_queue_.size() < kQueueSize) {
				349	continue_decoding_.notify_one();
				350	}
				351
				352	return bytes_to_copy;
				353	}
				354
Brian Silverman	f59fe3f	2020-09-22 21:04:09 -0700	[diff] [blame]	355	} // namespace aos::logger